Contents

1 Introduction

This package contains the data of bioenergetic features of 140 CLL samples and 9 B cell samples, measured by Seahorse extracellular flux assay. This package also reproduces figures presented in the paper “Energy metabolism is co-determined by genetic variants in chronic lymphocytic leukemia and influences drug sensitivity” by Lu J, Böttcher M et al.


In this vignette we present the integrative analysis of CLL bioenergetic data set and the source code for the paper.

This vignette is build from the sub-vignettes, which each can be build separatelly. The parts are separated by the horizontal lines. Each part finishes with removal of all the created objects.

library(seahorseCLL)
library(BloodCancerMultiOmics2017)
library(SummarizedExperiment)
library(ggbeeswarm)
library(xtable)
library(cowplot)
library(piano)
library(gridExtra)
library(grid)
library(genefilter)
library(pheatmap)
library(ggrepel)
library(GEOquery)
library(limma)
library(robustbase)
library(DESeq2)
library(survival)
library(maxstat)
library(survminer)
library(glmnet)
library(RColorBrewer)
library(reshape2)
library(gtable)
library(Biobase)
library(tidyverse)

2 Comparison between normal B cell and CLL cells

2.1 A global view of bioenergetic features in CLL and normal B-cells

Load data

data("seaBcell", "seaOri", "patmeta")

Combine the two data sets to one matrix

stopifnot(rownames(seaBcell) == rownames(seaOri))
seaOri$diagnosis <- patmeta[colnames(seaOri),]$Diagnosis
seaOri <- seaOri[,seaOri$diagnosis == "CLL"] # choose CLL samples

seaMat <- cbind(assays(seaBcell)$seaMedian, assays(seaOri)$seaMedian)
seaBatch <- rbind(colData(seaBcell)[,"dateMST", drop= FALSE], colData(seaOri)[,"dateMST", drop = FALSE])

#remove samples that contain NA values
seaMat <- seaMat[,complete.cases(t(seaMat))]

Principal component analysis

resPC <- prcomp(t(seaMat), center = TRUE, scale. = TRUE)
varExp <- resPC$sdev^2/sum(resPC$sdev^2)

Plot the first two principal components

#define color
colorList <- c(`B cell` = "#FF3030", CLL= "#1E90FF")

plotTab <- data.frame(resPC$x[,c(1,2)])
plotTab$type <- c(rep("B cell", ncol(seaBcell)), rep("CLL", nrow(plotTab)- ncol(seaBcell))) #10 normal b cell samples

pcaPlot <- ggplot(plotTab, aes(x=PC1, y=PC2, color = type)) + geom_point(size=3) + 
  xlab(sprintf("PC1 (%2.1f%s)",varExp[1]*100,"%")) + ylab(sprintf("PC2 (%2.1f%s)", varExp[2]*100, "%")) + 
  theme_bw() + theme(legend.position = c(0.9,0.9), legend.title = element_blank(), 
                     legend.background = element_rect(color="grey"),
                     axis.text = element_text(size =18),
                     axis.title = element_text(size = 20)) +
  scale_color_manual(values = colorList) + coord_cartesian(xlim = c(-5.5,5.5), ylim = c(-5.5,5.5))
pcaPlot

2.2 Associations between cell type and individual metabolic features

Prepare table for hypothesis test

seaMat <- data.frame(cbind(assays(seaBcell)$seaMedian, assays(seaOri)$seaMedian)) %>% rownames_to_column(var = "measure")
seaTab <- gather(seaMat, key = "patientID", value = "value", -measure) %>% 
  mutate(type = ifelse(substr(patientID, 1, 1) == "K", "B cell", "CLL")) %>% mutate(type = factor(type)) %>%
  mutate(batch = as.factor(seaBatch[patientID, ]))

t-test for each measurment

pTab <- group_by(seaTab, measure) %>% do((function(x) {
  res <- t.test(value ~ type, x, equal.var = TRUE)
  data.frame(p = res$p.value,
             diff = res$estimate[[2]] - res$estimate[[1]])
}) (.))
pTab$p.adj <- p.adjust(pTab$p, method = "BH")

ANOVA-test for each measurment (accounting for batch effect)

pTab.aov <- group_by(seaTab, measure) %>% do((function(x) {
  res <- summary(aov(value ~ type + batch, x))
  data.frame(p = res[[1]][["Pr(>F)"]][1])
}) (.))
pTab.aov$p.adj <- p.adjust(pTab.aov$p, method = "BH")

Expor the table to LaTex format using xtable

expTab <- pTab %>% ungroup() %>% mutate(measure = gsub("\\."," ",measure)) %>%
  rename("Seahorse measurement" = measure, "Difference of mean" = diff) %>%
  mutate(p = pTab.aov$p,  `adjusted p` = pTab.aov$p.adj) %>%
  select(-p.adj)
expTab[expTab$`Seahorse measurement` == "ECAR OCR ratio", 1] <- "ECAR/OCR"
fileConn <- file(paste0(plotDir,"tTest_BcellVSCLL.tex"))
writeLines(print(xtable(expTab, digits = 3, 
             caption = "ANOVA test results (adjusted for batch effect) of bioenergetic features between CLL cells and normal B cells "), 
      include.rownames=FALSE,
      caption.placement = "top"), fileConn)
close(fileConn)

Beeswarms plot for select measurement

measureList <- c("basal.respiration","glycolysis","ATP.production","glycolytic.capacity","maximal.respiration","glycolytic.reserve")
gList <- lapply(measureList, function(seaName) {
  
  plotTab <- filter(seaTab, measure == seaName)
  pval <- filter(pTab.aov, measure == seaName )$p
  
    
  #unit y (add unit to y axis, based on the type of measurement)
  if (seaName %in% c("basal.respiration","ATP.production","maximal.respiration")) {
    yLab <- "OCR (pMol/min)"
  } else yLab <- "ECAR (pMol/min)"
    
  #replace the "." in the mearement name with space
  seaName <- gsub("\\."," ",seaName)

  
  #plot title
  #plotTitle <- sprintf("p value = %s", format(pval, digits = 2, scientific = TRUE))
  plotTitle <- paste(sprintf("'%s (p = '~",seaName),
                     sciPretty(pval, digits = 2),"*')'")
  

  ggplot(plotTab, aes(x=type, y = value)) + 
       stat_boxplot(geom = "errorbar", width = 0.3) +
       geom_boxplot(outlier.shape = NA, col="black", width=0.4) + 
       geom_beeswarm(cex=2, size =0.5, aes(col = type)) + theme_classic() +
       xlab("") + ylab(yLab) + ggtitle(parse(text = plotTitle)) +
      theme(axis.line.x = element_blank(), axis.ticks.x = element_blank(),
             axis.title.y = element_text(size=10, face="bold"),
             axis.text = element_text(size=11),
             plot.title = element_text(size = 12, hjust=0.5),
             legend.position = "none",
             axis.text.x = element_text(face="bold",size=12)) +
    scale_color_manual(values = colorList)
  
})
beePlot <- grid.arrange(grobs = gList, ncol=2)

Combine the PCA plot and beeswarm plots (Figure 1)

title = ggdraw() + draw_figure_label("Figure 1", fontface = "bold", position = "top.left",size=20)
p <- plot_grid(pcaPlot, beePlot, labels= c("A","B"), rel_widths = c(1,1), label_size = 22)
plot_grid(title, p, rel_heights = c(0.05,0.95), ncol = 1)


3 Impact of genetic heterogeneity on energy metabolism of CLL

3.1 Data pre-processing

Load data

data("lpdAll", "seaOri","seaCombat", "patmeta", "mutCOM")

Subsetting

#overlap between the patient samples in seahorse dataset and main screen dataset
lpdCLL <- lpdAll[,pData(lpdAll)$Diagnosis %in% "CLL"]
seaMain <- intersect(colnames(seaOri),colnames(lpdCLL))
length(seaMain)
## [1] 140
#CLL seahorse data
seaSub <- t(assays(seaOri[,seaMain])$seaMedian)

Get patient genetic background

#extract genetic background information
genBack <- Biobase::exprs(lpdCLL)[fData(lpdCLL)$type %in%
                          c("gen","Methylation_Cluster","IGHV"), seaMain]  

genBack <- genBack[! rownames(genBack) %in% 
                     c("del13q14_bi","del13q14_mono"),]

genBack <- data.frame(t(genBack))
colnames(genBack) <- replace(colnames(genBack), colnames(genBack) == "del13q14_any", "del13q14")

3.2 Hypothesis testing: genetic variants vs bioenergetic features

Pre-processing data for testing

#get genetic background information
geneSub <- t(genBack[seaMain,])

seaTest <- t(seaSub)

geneTab <- data.frame(geneSub) %>% rownames_to_column("Variant") %>%
  gather(key = "patID",value = "status", -Variant)
seaTab <- data.frame(seaSub) %>% rownames_to_column("patID") %>%
  gather(key = "Measurement", value = "value", -patID)

testTab <- left_join(seaTab, geneTab, by = "patID")

Perform ANOVA-test, including batch as a co-variate.

aovTest <- function(value, status, batch) {
  eachTab <- tibble(value, status, batch) %>%
    filter(!is.na(value), !is.na(status))
  if (nrow(eachTab) >=10 & sum(eachTab$status != 0) >= 5) {
     res <- summary(aov(value ~ factor(status) + factor(batch), data=eachTab))
     mdTab <- group_by(eachTab, status) %>%
       summarise(mm = mean(value)) %>% arrange(status)
     data.frame(P.raw = res[[1]][5][1,], dm = mdTab$mm[nrow(mdTab)] - mdTab$mm[1])
  } else {
     data.frame(P.raw = NA, dm = NA)
  }
}

testTab <- mutate(testTab, batch = factor(colData(seaOri)[patID,]$dateMST))

p.tab <- group_by(testTab, Measurement, Variant) %>%
  do(aovTest(.$value, .$status, .$batch)) %>%
  ungroup() %>% filter(!is.na(P.raw)) %>%
  mutate(P.adj = p.adjust(P.raw, method = "BH"))

hist(p.tab$P.raw, breaks=20, col= "lightgreen", 
     main = "Seahorse VS genetics", xlab = "raw P values")

Export a table of all tested associations

#pCut <- 0.01

tabOut <- filter(p.tab, P.adj <= 1) %>% mutate(Measurement = formatSea(Measurement)) %>%
  mutate(Variant = ifelse(Variant == "IGHV.Uppsala.U.M", "IGHV status", Variant)) %>%
  select(Measurement, Variant, P.raw, dm, P.adj)

colnames(tabOut) <- c("Seahorse measurment", "Genetic variant", "p value", "Difference of mean", "adjusted p value")

write(print(xtable(tabOut, digits = 3, 
             caption = "ANOVA test results (adjusted for batch effect) of energy metabolic measurements related to different genetic variants"), 
      include.rownames=FALSE,
      caption.placement = "top"), file = paste0(plotDir,"tTest_SeahorseVSgene.tex"))

Scatter plot of p values

#prepare table for plot
plotTab <- p.tab %>% mutate(type = ifelse(P.adj > 0.05, "not significant", 
                                          ifelse(dm >0, "higher","lower"))) %>%
  mutate(Variant = ifelse(Variant == "IGHV.Uppsala.U.M", "IGHV status", Variant),
         Measurement = formatSea(Measurement)) %>%
  mutate(varName = ifelse(type == "not significant","",Variant))

#define color
#colList <- c("#c0508a", "#79c858", "#7e45b9", "#c0ad52",
#             "#8c8bbd", "#c35c41", "#86bca8", "#4b3e3a","grey80")
colList <- c(`not significant` = "grey80", higher = "firebrick", lower = "darkblue")
#fdr cut-off
fdrCut <- max(filter(plotTab, type != "not significant")$P.raw)
pos = position_jitter(width = 0.15, seed = 10)
p <- ggplot(data=plotTab, aes(x= Measurement, y=-log10(P.raw),
                              col=type, label = varName))+ 
  geom_text_repel(position = pos, color = "black", size= 4, force = 3) +
  geom_hline(yintercept = -log10(fdrCut), linetype="dotted", color = "grey20") + 
  geom_point(size=3, position = pos) + 
  ylab(expression(-log[10]*'('*p~value*')')) + xlab("Seahore measurements") +
  theme_bw() + scale_color_manual(values = colList) +
  theme(axis.text.x = element_text(vjust=0.5, hjust = 1,size=15),
        axis.text.y = element_text(size=15),
        axis.title = element_text(size =18),
        legend.position = "none") +
  annotate(geom = "text", x = 0.6, y = -log10(fdrCut) + 0.5, label = "5% FDR") +
  coord_flip()
plot(p)

Plot all significant associations as beeswarm plot

p.tab.sig <- p.tab[p.tab$P.adj < 0.05,]

#batch effect corrected values should be used for plot
seaPlot <- assays(seaCombat)$seaMedian
seaPlot <- seaPlot[rownames(seaTest), colnames(seaTest)]

gList <- lapply(seq(1,nrow(p.tab.sig)), function(i) {
  seaName <- p.tab.sig[i,]$Measurement
  geneName <- p.tab.sig[i,]$Variant
  pval <- p.tab.sig[i,]$P.raw
  
  plotTab <- tibble(Measurement = seaPlot[seaName,], Mutation = geneSub[geneName,]) %>% 
    filter(!is.na(Measurement), !is.na(Mutation))
  #for IGHV
  if (geneName == "IGHV.Uppsala.U.M") {
    geneName <- "IGHV status"
    plotTab <- mutate(plotTab, 
                      Status = ifelse(Mutation == 0, 
                                      sprintf("unmutated\n(n=%s)", sum(Mutation == 0)), 
                                      sprintf("mutated\n(n=%s)", sum(Mutation == 1))))
  } else if (geneName == "Methylation_Cluster") {
    plotTab <- mutate(plotTab,
                      Status = ifelse(Mutation == 0,sprintf("LP\n(n=%s)", sum(Mutation == 0)),
                                      ifelse(Mutation == 1, sprintf("IP\n(n=%s)", sum(Mutation == 1)),
                                             sprintf("HP\n(n=%s)", sum(Mutation == 2)))))
  } else plotTab <- mutate(plotTab, 
                           Status = ifelse(Mutation == 0,
                                          sprintf("wild type\n(n=%s)", sum(Mutation == 0)), 
                                          sprintf("mutated\n(n=%s)", sum(Mutation == 1))))
  
  #reverse label factor (wildtype always on the rigt)
  plotTab <- mutate(plotTab, Status = factor(Status)) %>% 
    mutate(Status = factor(Status, levels = rev(levels(Status))))
  
  # color scheme, black for wildtype, red for mutated
  if (length(levels(plotTab$Status)) == 2) {
    colorList <- c("black","red")
    names(colorList) <- levels(plotTab$Status)
  } else {
    colorList <- c("lightblue","blue", "darkblue")
    names(colorList) <- levels(plotTab$Status)
  }
  
  #set y label
  if (seaName %in% c("ECAR.OCR.ration")) {
    yLab = "ECAR/OCR"
  } else if (seaName %in% c("maximal.respiration","spare.respiratory.capacity","basal.respiration",
                            "ATP.production","OCR","proton.leak")) {
    yLab = "OCR (pMol/min)" } else yLab = "ECAR (pMol/min)"
  
  #replace the "." in the mearement name with space
  seaName <- formatSea(seaName)
  
  #plot title
  plotTitle <- paste(sprintf("'%s ~ %s (p = '~",seaName,geneName),
                     sciPretty(pval, digits = 2),"*')'")
  
  ggplot(plotTab, aes(x=Status, y = Measurement)) + 
      stat_boxplot(geom = "errorbar", width = 0.3) +
      geom_boxplot(outlier.shape = NA, col="black", width=0.4) + 
      geom_beeswarm(cex=2, size =1, aes(col = Status)) + theme_classic() +
      xlab("") + ylab(yLab) + ggtitle(parse(text=plotTitle)) + 
      scale_color_manual(values = colorList) +
      theme(axis.line.x = element_blank(), axis.ticks.x = element_blank(),
             axis.title = element_text(size=18, face="bold"),
             axis.text = element_text(size=18),
             plot.title = element_text(hjust=0.5,size=13),
             legend.position = "none",
             axis.title.x = element_text(face="bold"))
  
})
names(gList) <- paste0(p.tab.sig$Measurement, "_",p.tab.sig$Variant)

Combine p value scatter plot and beeswarm plots (Figure 2)

title = ggdraw() + draw_figure_label("Figure 2", fontface = "bold", position = "top.left",size=22)
pout <- ggdraw() +
  draw_plot(p, 0, 0, 0.58, 1) +
  draw_plot(gList$glycolysis_IGHV.Uppsala.U.M, 0.6, 0.5 , .4, .48) +
  draw_plot(gList$glycolysis_Methylation_Cluster, 0.6, 0 , .4, .48) +
  draw_plot_label(c("A", "B", "C"), 
                  c(0, 0.6, 0.6), c(1, 1, 0.5), size = 20)
plot_grid(title, pout, rel_heights = c(0.05,0.95), ncol = 1)

Plot the rest in a separate pdf file (For supplementary figure)

gList$glycolysis_IGHV.Uppsala.U.M <- NULL
gList$glycolysis_Methylation_Cluster <- NULL
grid.arrange(grobs = gList, ncol=2)

4 Considering pretreatment status in a multi-variate model

data("pretreat")
testTab.pretreat <- testTab %>% mutate(treat = pretreat[patID,]) %>%
  filter(paste0(Measurement,Variant) %in% paste0(p.tab$Measurement, p.tab$Variant))

Perform ANOVA-test, including both batch and pretreatment status as a co-variate.

aovTest.pretreat <- function(value, status, batch, treat) {
  eachTab <- tibble(value, status, batch,treat) %>%
    filter(!is.na(value), !is.na(status),!is.na(treat))
  if (nrow(eachTab) >=10 & sum(eachTab$status != 0) >= 5) {
     res <- summary(aov(value ~ factor(status) * factor(treat) + factor(batch)  , data=eachTab))
     mdTab <- group_by(eachTab, status) %>%
       summarise(mm = mean(value)) %>% arrange(status)
     data.frame(P.raw = res[[1]][5][1,], dm = mdTab$mm[nrow(mdTab)] - mdTab$mm[1], 
                P.inter = res[[1]][5][4,])
  } else {
     data.frame(P.raw = NA, dm = NA)
  }
}

testTab <- mutate(testTab, batch = factor(colData(seaOri)[patID,]$dateMST))

p.tab.pretreat <- group_by(testTab.pretreat, Measurement, Variant) %>%
  do(aovTest.pretreat(.$value, .$status, .$batch,.$treat)) %>%
  ungroup() %>% filter(!is.na(P.raw)) %>%
  mutate(P.adj = p.adjust(P.raw, method = "BH"),
         P.inter.adj = p.adjust(P.inter, method = "BH"))

hist(p.tab.pretreat$P.raw, breaks=20, col= "lightgreen", 
     main = "Seahorse VS genetics", xlab = "raw P values")

4.1 Plot of p value comparison

pTab.compare <- left_join(select(p.tab, Measurement, Variant, P.raw, P.adj) %>% dplyr::rename(p.all = P.raw, p.adj.all = P.adj),
                          select(p.tab.pretreat, Measurement, Variant, P.raw, P.adj) %>% dplyr::rename(p.naive = P.raw,
                                                                                                p.adj.naive = P.adj),
                          by = c("Measurement","Variant")) %>%
  mutate(sigGroup = ifelse( p.adj.all > 0.05 & p.adj.naive > 0.05, "Below 5% FDR in both models",
                            ifelse(p.adj.all <= 0.05 & p.adj.naive > 0.05, "Significant without pretreatment in the model", 
                            ifelse(p.adj.all > 0.05 & p.adj.naive <= 0.05, "Significant with pretreatment accounted",
                                                                                                                                   "Significant in both models"))))

colorList <- c(`Below 5% FDR in both models` = "grey70",
               `Significant in both models` = "#E41A1C",
               `Significant without pretreatment in the model` = "#377EB8",
               `Significant with pretreatment accounted` = "#984EA3")
ggplot(pTab.compare, aes(x=-log10(p.all), y = -log10(p.naive), color = sigGroup)) + 
  geom_point() +
  geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dotted") +
  theme_bw() + ylab(expression('-log'[10]*'P, accounting for pretreatment')) +
  xlab(expression('-log'[10]*'P, pretreatment not considered')) + 
  ggtitle("Bioenergetic features ~ genetic variants") +
  scale_color_manual(values = colorList, name = "Statistical significance") +
  theme(plot.title = element_text(face = "bold", hjust =0.5,size=15), 
        legend.position = c(0.65,0.15),
        legend.background = element_rect(colour = "black"),
        legend.text = element_text(size =10),
        legend.title = element_text(size = 12),
        axis.text = element_text(size =13),
        axis.title = element_text(size =14)) 


5 Associations between drug response phenotype and energy metabolism of CLL

5.1 Data pre-processing

Load data set

data("lpdAll","drugs", "patmeta", "seaCombat", "conctab")

Sample subsetting

#get drug response data for CLL samples only
lpdCLL <- lpdAll[fData(lpdAll)$type == "viab",pData(lpdAll)$Diagnosis == "CLL"]

#get overlapped samples
sampleOverlap <- intersect(colnames(lpdCLL), colnames(seaCombat))
seaSub <- seaCombat[,sampleOverlap]
lpdCLL <- lpdCLL[,sampleOverlap]

How many overlapped samples?

length(sampleOverlap)
## [1] 140

Remove bad drugs. Bortezomib lost its activity during storage. The data for this drug and NSC 74859 were discarded from further analysis.

badrugs = c("D_008", "D_025") 
lpdCLL <- lpdCLL[!fData(lpdCLL)$id %in% badrugs,]

Get drug response data

# get drug responsee data
get.drugresp <- function(lpd) {
  drugresp = t(Biobase::exprs(lpd[fData(lpd)$type == 'viab'])) %>%
    tbl_df %>% dplyr::select(-ends_with(":5")) %>%
    dplyr::mutate(ID = colnames(lpd)) %>%
    tidyr::gather(drugconc, viab, -ID) %>%
    dplyr::mutate(drug = drugs[substring(drugconc, 1, 5), "name"],
           conc = sub("^D_([0-9]+_)", "", drugconc)) %>%
    dplyr::mutate(conc = as.integer(gsub("D_CHK_", "", conc)))
  
  drugresp
}
drugresp <- get.drugresp(lpdCLL)

Use median polish to summarise drug response of the five concentrations

get.medp <- function(drugresp) {
  tab = drugresp %>% group_by(drug, conc) %>% 
    do(data.frame(v = .$viab, ID = .$ID)) %>% spread(ID, v)
  
  med.p = lapply(unique(tab$drug), function(n) {
    tb = filter(tab, drug == n) %>% ungroup() %>% select(-(drug:conc)) %>% 
      as.matrix %>% `rownames<-`(1:5)
    mdp = stats::medpolish(tb, trace.iter = FALSE)
    df = as.data.frame(mdp$col) + mdp$overall
    colnames(df) <- n
    df
  }) %>% do.call(cbind,.)
  
  medp.viab = tbl_df(med.p) %>% mutate(ID = rownames(med.p)) %>%
    gather(drug, viab, -ID) 
  medp.viab
}
drugresp.mp <- get.medp(drugresp)

5.2 Association test between drug response and seahorse measurements

5.2.1 Function to calculate correlation given a drug table and seahorse table

corTest <- function(patID, viab, seaTable, ighv = NULL, pretreat = NULL) {
  viab <- setNames(viab, patID)
  
  corTab <- lapply(seq(1,nrow(seaTable)), function(i) {
    seaName <- rownames(seaTable)[i]
    
    #remove NA samples in Seahorse entry
    seaVal <- seaTable[seaName,]
    seaVal <- seaVal[!is.na(seaVal)]
    
    
    #get useable sample list
    if (!is.null(ighv)) {
      patList <- intersect(names(ighv), intersect(patID, names(seaVal)))
      ighvVal <- ighv[patList]
    } else {
      patList <- intersect(patID, names(seaVal))
    }
    
    if (!is.null(pretreat)) {
      patList <- intersect(names(pretreat), patList)
      pretreat <- pretreat[patList]
    }
    
    #subset drug value
    drugVal <- viab[patList]
    seaVal <- seaVal[patList]
    
    #correlation test, block for IGHV
    if (!is.null(ighv)) {
      res <- summary(lm(seaVal ~ drugVal + ighvVal))
      data.frame(seahorse = seaName, 
               p = res$coefficients[2,4], 
               coef =  sqrt(res$r.squared) * sign(res$coefficients[2,3]),
               stringsAsFactors = FALSE)
    } else if (!is.null(pretreat)) {
      res <- summary(lm(seaVal ~ drugVal + pretreat))
      data.frame(seahorse = seaName, 
               p = res$coefficients[2,4], 
               coef =  sqrt(res$r.squared) * sign(res$coefficients[2,3]),
               stringsAsFactors = FALSE)
    } else {
      res <- cor.test(seaVal, drugVal, method = "pearson")
      data.frame(seahorse = seaName, 
         p = res$p.value, 
         coef =  res$estimate[[1]],
         stringsAsFactors = FALSE)
      
    }
  }) %>% do.call(rbind,.)
}

5.2.2 Correlation test without blocking for IGHV

Calculate correlation coefficient and p values

seaTest <- assays(seaSub)$seaMedian
resTab.noBlock <- group_by(drugresp.mp, drug) %>% do(corTest(.$ID, .$viab, seaTest, ighv = NULL)) %>% ungroup() %>%
  mutate(p.adj = p.adjust(p, method = "BH"))

How many significant associations at 10% FDR?

resTab.noBlock %>% filter(p.adj <= 0.1) %>% nrow()
## [1] 118

How many drugs show at least one significant assocations?

resTab.noBlock %>% filter(p.adj <= 0.1) %>% filter(!duplicated(drug)) %>% nrow()
## [1] 32

5.2.3 Correlation test with blocking for IGHV

Calculate correlation coefficient and p values

#get IGHV stauts
ighv <- Biobase::exprs(lpdAll)["IGHV Uppsala U/M",]
ighv <-ighv[!is.na(ighv)]


resTab <- group_by(drugresp.mp, drug) %>% do(corTest(.$ID, .$viab, seaTest, ighv = ighv)) %>% ungroup() %>%
  mutate(p.adj = p.adjust(p, method = "BH"))

How many significant associations at 10% FDR?

resTab %>% filter(p.adj <= 0.1) %>% nrow()
## [1] 45

How many drugs show at least one significant assocations?

resTab %>% filter(p.adj <= 0.1) %>% filter(!duplicated(drug)) %>% nrow()
## [1] 19

5.3 Correlation test with blocking for pretreatment status

Calculate correlation coefficient and p values

#get IGHV stauts
ighv <- Biobase::exprs(lpdAll)["IGHV Uppsala U/M",]
ighv <-ighv[!is.na(ighv)]

#get pretreatment status
data("pretreat")
pretreat <- structure(pretreat$pretreat, names = rownames(pretreat))

resTab.pretreat <- group_by(drugresp.mp, drug) %>% do(corTest(.$ID, .$viab, seaTest, ighv = NULL, pretreat = pretreat)) %>% ungroup() %>%
  mutate(p.adj = p.adjust(p, method = "BH"))

How many significant associations at 10% FDR?

resTab.pretreat %>% filter(p.adj <= 0.1) %>% nrow()
## [1] 100

How many drugs show at least one significant assocations?

resTab.pretreat %>% filter(p.adj <= 0.1) %>% filter(!duplicated(drug)) %>% nrow()
## [1] 30

Compare p values

compareTab <- left_join(resTab.noBlock, resTab.pretreat, by = c("drug","seahorse")) %>%
    mutate(sigGroup = ifelse( p.adj.x > 0.05 & p.adj.y > 0.05, "Below 5% FDR in both models",
                            ifelse(p.adj.x <= 0.05 & p.adj.y > 0.05, "Significant without pretreatment in the model", 
                            ifelse(p.adj.x > 0.05 & p.adj.y <= 0.05, "Significant with pretreatment accounted",
                                                                                                                                   "Significant in both models"))))

colorList <- c(`Below 5% FDR in both models` = "grey70",
               `Significant in both models` = "#E41A1C",
               `Significant without pretreatment in the model` = "#377EB8",
               `Significant with pretreatment accounted` = "#984EA3")

ggplot(compareTab, aes(x=-log10(p.x), y = -log10(p.y), color = sigGroup)) + 
  geom_point() +
  geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dotted") +
  theme_bw() + ylab(expression('-log'[10]*'P, accounting for pretreatment')) +
  xlab(expression('-log'[10]*'P, pretreatment not considered')) + 
  ggtitle("Bioenergetic features ~ drug responses") +
  scale_color_manual(values = colorList, name = "Statistical significance") +
  theme(plot.title = element_text(face = "bold", hjust =0.5,size=15), 
        legend.position = c(0.65,0.15),
        legend.background = element_rect(colour = "black"),
        legend.text = element_text(size =8),
        legend.title = element_text(size = 10),
        axis.text = element_text(size =13),
        axis.title = element_text(size =14)) 

Create a table showing the associations that are significant only without pretreatment in the model

tabOut <- filter(compareTab, sigGroup == "Significant without pretreatment in the model") %>% 
  arrange(drug) %>% mutate(seahorse = formatSea(seahorse)) %>% select(-sigGroup,-coef.x, -coef.y) %>%
  rename(`Drug name` = drug, `Seahorse measurement` = seahorse,
         `p value (pretreatment not considered)` = p.x,
         `adjusted p value (pretreatment not considered)` = p.adj.x,
         `p value (accounted for pretreatment)` = p.y,
         `adjusted p value (accounted for pretreatment)` = p.adj.y)

write(print(xtable(tabOut, digits = 3, 
             caption = "Associations between bioenergetic features and drug responses that are only significant with pretreatment in the model"), 
      include.rownames=FALSE,
      caption.placement = "top"), file = paste0(plotDir,"seahorseVSdrug_onlyWithoutPretreat.tex"))
## % latex table generated in R 3.5.0 by xtable 1.8-3 package
## % Fri Feb  8 17:18:51 2019
## \begin{table}[ht]
## \centering
## \caption{Associations between bioenergetic features and drug responses that are only significant with pretreatment in the model} 
## \begin{tabular}{llrrrr}
##   \hline
## Drug name & Seahorse measurement & p value (pretreatment not considered) & adjusted p value (pretreatment not considered) & p value (accounted for pretreatment) & adjusted p value (accounted for pretreatment) \\ 
##   \hline
## AT13387 & maximal respiration & 0.003 & 0.033 & 0.010 & 0.080 \\ 
##   AT13387 & OCR & 0.002 & 0.021 & 0.006 & 0.060 \\ 
##   AZD7762 & OCR & 0.003 & 0.031 & 0.008 & 0.070 \\ 
##   dasatinib & spare respiratory capacity & 0.002 & 0.027 & 0.007 & 0.066 \\ 
##   encorafenib & glycolytic capacity & 0.001 & 0.017 & 0.004 & 0.050 \\ 
##   everolimus & glycolytic reserve & 0.004 & 0.037 & 0.005 & 0.055 \\ 
##   fludarabine & glycolysis & 0.004 & 0.037 & 0.007 & 0.067 \\ 
##   ibrutinib & spare respiratory capacity & 0.002 & 0.027 & 0.005 & 0.055 \\ 
##   ibrutinib & OCR & 0.003 & 0.034 & 0.008 & 0.073 \\ 
##   idelalisib & basal respiration & 0.005 & 0.042 & 0.005 & 0.055 \\ 
##   MIS-43 & ECAR/OCR & 0.005 & 0.045 & 0.008 & 0.074 \\ 
##   MK-1775 & maximal respiration & 0.004 & 0.034 & 0.010 & 0.080 \\ 
##   MK-2206 & maximal respiration & 0.003 & 0.031 & 0.006 & 0.060 \\ 
##   PF 477736 & glycolytic capacity & 0.001 & 0.014 & 0.007 & 0.068 \\ 
##   PF 477736 & glycolytic reserve & 0.002 & 0.027 & 0.013 & 0.088 \\ 
##   PF 477736 & OCR & 0.003 & 0.031 & 0.011 & 0.084 \\ 
##   tipifarnib & maximal respiration & 0.003 & 0.031 & 0.005 & 0.054 \\ 
##    \hline
## \end{tabular}
## \end{table}

5.4 P-value scatter plot

5.4.1 Correlation test with blocking for IGHV status (Supplementary Figure)

Preocess table for plotting

atLeastOne <- group_by(resTab, drug) %>% summarise(sigNum = sum(p.adj <= 0.1)) %>% filter(sigNum > 0)
plotTab <- filter(resTab, drug %in% atLeastOne$drug) %>% 
  mutate(seahorse = ifelse(p.adj > 0.1, "not significant", seahorse))

#change mearement name 
plotTab$seahorse <- sapply(plotTab$seahorse, function(x) {gsub("\\."," ",x)})

#define the group of seahorse measurement
measureType <- tibble(measure = rownames(seaCombat), type = rowData(seaCombat)$type) %>% 
  mutate(type = ifelse(type %in% c("ECAR.OCR","GST","ECAR"), "glycolysis", "respiration"))

#generate color list separately for each group
glyList <- setNames(tail(brewer.pal(9,"OrRd"),nrow(filter(measureType, type =="glycolysis"))),
                    filter(measureType, type =="glycolysis")$measure)
resList <- setNames(tail(brewer.pal(9,"GnBu"),nrow(filter(measureType, type =="respiration"))),
                    filter(measureType, type =="respiration")$measure)
nosig <- c("not significant" = "grey80")
colList <- c(glyList, resList, nosig)
names(colList) <- sapply(names(colList), function(x) {gsub("\\."," ",x)})

#order the factor for seahorse measurment
plotTab$seahorse <- factor(plotTab$seahorse, levels = names(colList))

#add the direction of correlation
plotTab <- mutate(plotTab, Direction = ifelse(coef > 0, "positive", "negative"))

#get the cutoff value
fdrCut <- max(filter(plotTab, seahorse != "not significant")$p)

Plot

p <- ggplot(data = plotTab, aes(x = drug,y=-log10(p), fill = seahorse, color = seahorse, shape = Direction)) + 
  geom_point(size=4) + scale_fill_manual(values = colList) + scale_color_manual(values = colList) +
  geom_hline(yintercept = -log10(fdrCut), linetype="dotted") + 
  ylab(expression(-log[10]*'('*p~value*')')) + xlab("") + theme_bw() + 
  scale_shape_manual(values = c(positive = 24, negative = 25)) +
  theme(axis.text.x = element_text(angle = 90, vjust=0.5, hjust = 1,size=15),
        axis.text.y = element_text(size=15),
        axis.title = element_text(size =15, face = "bold")) +
  guides(fill="none", color = guide_legend(title = "Measurement"))

plot(p)

5.4.2 Correlation test without blocking for IGHV status ( Figure 4)

Preocess table for plotting

atLeastOne <- group_by(resTab.noBlock, drug) %>% summarise(sigNum = sum(p.adj <= 0.1)) %>% filter(sigNum > 0)
plotTab <- filter(resTab.noBlock, drug %in% atLeastOne$drug) %>% 
  mutate(seahorse = ifelse(p.adj > 0.1, "not significant", seahorse))

#change mearement name 
plotTab$seahorse <- sapply(plotTab$seahorse, function(x) {gsub("\\."," ",x)})

#define the group of seahorse measurement
measureType <- tibble(measure = rownames(seaCombat), type = rowData(seaCombat)$type) %>% 
  mutate(type = ifelse(type %in% c("ECAR.OCR","GST","ECAR"), "glycolysis", "respiration"))

#generate color list separately for each group
glyList <- setNames(tail(brewer.pal(9,"OrRd"),nrow(filter(measureType, type =="glycolysis"))),
                    filter(measureType, type =="glycolysis")$measure)
resList <- setNames(tail(brewer.pal(9,"GnBu"),nrow(filter(measureType, type =="respiration"))),
                    filter(measureType, type =="respiration")$measure)

nosig <- c("not significant" = "grey80")
colList <- c(glyList, resList, nosig)
names(colList) <- sapply(names(colList), function(x) {gsub("\\."," ",x)})

#order the factor for seahorse measurment
plotTab$seahorse <- factor(plotTab$seahorse, levels = names(colList))

#add direction iformation
plotTab <- mutate(plotTab, Direction = ifelse(coef > 0, "positive", "negative"))

#get the cutoff value
fdrCut <- max(filter(plotTab, seahorse != "not significant")$p)
drugManhattan <- ggplot(data = plotTab, aes(x = drug,y=-log10(p), fill = seahorse, color = seahorse, shape = Direction)) + 
  geom_point(size=4) + scale_fill_manual(values = colList) + scale_color_manual(values = colList) +
  geom_hline(yintercept = -log10(fdrCut), linetype="dotted") + 
  ylab(expression(-log[10]*'('*p~value*')')) + xlab("") + theme_bw() + 
  scale_shape_manual(values = c(positive = 24, negative = 25)) +
  theme(axis.text.x = element_text(angle = 90, vjust=0.5, hjust = 1,size=15),
        axis.text.y = element_text(size=15),
        axis.title = element_text(size =15, face = "bold"),
        plot.title = element_text(size=20, face = "bold")) +
  guides(fill="none", color = guide_legend(title = "Measurement")) 


plot(drugManhattan)

5.4.3 Correlation test with blocking for pretreatment status (Supplementary Figure)

Preocess table for plotting

atLeastOne <- group_by(resTab.pretreat, drug) %>% summarise(sigNum = sum(p.adj <= 0.1)) %>% filter(sigNum > 0)
plotTab <- filter(resTab.pretreat, drug %in% atLeastOne$drug) %>% 
  mutate(seahorse = ifelse(p.adj > 0.1, "not significant", seahorse))

#change mearement name 
plotTab$seahorse <- sapply(plotTab$seahorse, function(x) {gsub("\\."," ",x)})

#define the group of seahorse measurement
measureType <- tibble(measure = rownames(seaCombat), type = rowData(seaCombat)$type) %>% 
  mutate(type = ifelse(type %in% c("ECAR.OCR","GST","ECAR"), "glycolysis", "respiration"))

#generate color list separately for each group
glyList <- setNames(tail(brewer.pal(9,"OrRd"),nrow(filter(measureType, type =="glycolysis"))),
                    filter(measureType, type =="glycolysis")$measure)
resList <- setNames(tail(brewer.pal(9,"GnBu"),nrow(filter(measureType, type =="respiration"))),
                    filter(measureType, type =="respiration")$measure)
nosig <- c("not significant" = "grey80")
colList <- c(glyList, resList, nosig)
names(colList) <- sapply(names(colList), function(x) {gsub("\\."," ",x)})

#order the factor for seahorse measurment
plotTab$seahorse <- factor(plotTab$seahorse, levels = names(colList))

#add the direction of correlation
plotTab <- mutate(plotTab, Direction = ifelse(coef > 0, "positive", "negative"))

#get the cutoff value
fdrCut <- max(filter(plotTab, seahorse != "not significant")$p)

Plot

p <- ggplot(data = plotTab, aes(x = drug,y=-log10(p), fill = seahorse, color = seahorse, shape = Direction)) + 
  geom_point(size=4) + scale_fill_manual(values = colList) + scale_color_manual(values = colList) +
  geom_hline(yintercept = -log10(fdrCut), linetype="dotted") + 
  ylab(expression(-log[10]*'('*p~value*')')) + xlab("") + theme_bw() + 
  scale_shape_manual(values = c(positive = 24, negative = 25)) +
  theme(axis.text.x = element_text(angle = 90, vjust=0.5, hjust = 1,size=15),
        axis.text.y = element_text(size=15),
        axis.title = element_text(size =15, face = "bold")) +
  guides(fill="none", color = guide_legend(title = "Measurement"))

plot(p)

5.5 Scatter plot of associations

Scatter plot for all significant pairs

resTab.sig <- filter(resTab, p.adj <= 0.1)

scatterList <- lapply(seq(1,nrow(resTab.sig)), function(i){
  seaName <- resTab.sig[i,]$seahorse
  p <- resTab.sig[i,]$p
  coef <- format(resTab.sig[i,]$coef,digits = 2)
  drugName <- resTab.sig[i,]$drug
  
  #remove NA samples in Seahorse entry
  seaVal <- seaTest[seaName,]
  seaVal <- seaVal[!is.na(seaVal)]
  
  #get useable sample list
  patList <- intersect(filter(drugresp.mp, drug == drugName)$ID, names(seaVal))
  
  #set y label
  if (seaName %in% c("ECAR.OCR.ration")) {
    xLab = "ECAR/OCR"
  } else if (seaName %in% c("maximal.respiration","spare.respiratory.capacity","basal.respiration",
                            "ATP.production")) {
    xLab = "OCR (pMol/min)" } else xLab = "ECAR (pMol/min)"
  
  #format seahorse measurement name
  seaName.new <- ifelse(seaName == "ECAR.OCR.ratio", "ECAR/OCR", gsub("\\."," ",seaName))
  
  #prepare title
  plotTitle <- sprintf("%s ~ %s", drugName, seaName.new)
  
  #prepare plot table
  plotTab <- filter(drugresp.mp, drug == drugName, ID %in% patList) %>%
    mutate(sea=seaVal[ID])
  
  #prepare correlation test annotations
  annoText <- paste("'coefficient ='~",coef,"*","', p ='~",sciPretty(p,digits=2))
  limX <- max(plotTab$sea) + 2
  midX <- max(plotTab$sea)/2
  ggplot(plotTab, aes(x=sea,y=100*viab)) + geom_point(size=1) + 
    geom_smooth(method = "lmrob", se= FALSE) +
    xlab(xLab) + ylab("% viability after drug treatment") +
    theme_bw() + ggtitle(plotTitle) + coord_cartesian(xlim = c(-2,limX)) +
    annotate("text", x = midX, y = Inf, label = annoText, 
             vjust=1, hjust=0.5, size = 5, parse = TRUE, col= "darkred") +
    theme(panel.grid = element_blank(),
          axis.title = element_text(size=13, face="bold"),
          axis.text = element_text(size=12),
          plot.title = element_text(face="bold", hjust=0.5, size = 15 ),
          legend.position = "none",
          axis.title.x = element_text(face="bold"),
          plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))
})

names(scatterList) <- paste0(resTab.sig$seahorse, "_", resTab.sig$drug)

A figure of selected drugs (Supplementary Figure)

plot_grid(scatterList$glycolysis_rotenone, 
          scatterList$glycolysis_venetoclax,
          scatterList$glycolytic.capacity_orlistat,
          scatterList$`ECAR_KX2-391`,ncol=2)

5.6 Association test for individual concentrations

Association tests for each concentration

corRes_conc <- group_by(drugresp, drug, conc) %>% do(corTest(.$ID, .$viab, seaTest, ighv = NULL)) %>% ungroup() %>%
  mutate(p.adj = p.adjust(p, method = "BH"))

Prepare plot tab

drugList <- unique(filter(corRes_conc, p.adj < 0.1)$drug)
seaList <- unique(filter(corRes_conc, p.adj < 0.1)$seahorse)

plotTab <- filter(corRes_conc, drug %in% drugList,
                  seahorse %in% seaList) %>% mutate(concIndex = paste0("c",conc)) %>%
  mutate(coef = ifelse(p.adj < 0.1, coef, 0),
         seahorse = ifelse(seahorse != "ECAR.OCR.ratio", seahorse,
                              "ECAR/OCR")) %>%
  mutate(seahorse = gsub("[.]","\n",seahorse)) %>%
  mutate(seahorse = factor(seahorse))

#plot similar seahorse measurment together
plotTab$seahorse <- factor(plotTab$seahorse, 
                              levels = levels(plotTab$seahorse)[c(3,4,5,6,7,
                                                                     9,1,2,8,11,10)])

Heatmap plot for p values (Supplementary Figure)

ggplot(plotTab, aes(x=concIndex, y = drug, fill = coef)) + geom_tile(size = 0.3, color = "white") + facet_wrap(~ seahorse, nrow = 1) + 
  scale_fill_gradient2(low = "red", mid = "white", high = "blue", midpoint = 0,
                       limits =c(-0.6,0.6), labels = seq(-0.8,0.8, by = 0.2),
                       breaks = seq(-0.8,0.8, by = 0.2),
                       name = "Coefficient") +
  theme_bw() + theme(strip.text = element_text(face = "bold"),
                     axis.text.y = element_text(size =12)) +
  ylab("Drug name") + xlab("Concentration Index")

6 Multi-variate models for predicting drug responses

6.0.1 Data pre-processing

For genetic data

#mutations and copy number variations
mutCOMbinary<-channel(mutCOM, "binary")
mutCOMbinary<-mutCOMbinary[featureNames(mutCOMbinary) %in% colnames(seaCombat),]
genData<-Biobase::exprs(mutCOMbinary)
idx <- which(colnames(genData) %in% c("del13q14_bi", "del13q14_mono"))
genData <- genData[,-idx]
colnames(genData)[which(colnames(genData)=="del13q14_any")] = "del13q14"
genData <- data.frame(genData)

#add IGHV
translation <- c(`U` = 0, `M` = 1)
stopifnot(all(patmeta$IGHV %in% c("U","M", NA)))
IGHVData <- data.frame(row.names = rownames(patmeta), translation[patmeta$IGHV])
genData$IGHV <- IGHVData[rownames(genData),]

#add methylation
# Methylation cluster
translation <- c(`HP` = 2, `IP` = 1, `LP` = 0)
Mcluster <- data.frame(row.names =rownames(patmeta), translation[patmeta$ConsClust])
genData$Methylation_Cluster <- Mcluster[rownames(genData),]           

genData <- as.matrix(genData)

genData <- genData[,colSums(!is.na(genData)) >= 10 & colSums(genData,na.rm = TRUE) >= 5]

#fill the missing value with majority
genData <- apply(genData, 2, function(x) {
  xVec <- x
  popVal <- names(which.max(table(x)))
  xVec[is.na(xVec)] <- as.integer(popVal)
  xVec
})

For drug response data

viabData <- spread(drugresp.mp, key = "ID", value = "viab") %>%
  data.frame() %>% column_to_rownames("drug") %>%
  as.matrix() %>% t()

Prepare seahorse meaurement

sea <- t(assays(seaCombat)$seaMedian)

#use complete cases and subset
sea <- sea[complete.cases(sea),]

Function to Generate the explanatory dataset for each drug responses

#function to generate response vector and explainatory variable for each seahorse measurement

generateData.drug <- function(inclSet, onlyCombine = FALSE, censor = NULL, robust = FALSE) {
    
    dataScale <- function(x, censor = NULL, robust = FALSE) {
        #function to scale different variables
        if (length(unique(na.omit(x))) == 2){
          #a binary variable, change to -0.5 and 0.5 for 1 and 2
          x - 0.5
        } else if (length(unique(na.omit(x))) == 3) {
          #catagorical varialbe with 3 levels, methylation_cluster, change to -0.5,0,0.5
          (x - 1)/2
        } else {
          if (robust) {
          #continuous variable, centered by median and divied by 2*mad
          mScore <- (x-median(x,na.rm=TRUE))/(1.4826*mad(x,na.rm=TRUE))
            if (!is.null(censor)) {
              mScore[mScore > censor] <- censor
              mScore[mScore < -censor] <- -censor
            }
          mScore/2
          } else {
            mScore <- (x-mean(x,na.rm=TRUE))/(sd(x,na.rm=TRUE))
              if (!is.null(censor)) {
                mScore[mScore > censor] <- censor
                mScore[mScore < -censor] <- -censor
              }
          mScore/2
          }
        }
      }
    
    
    
    allResponse <- list()
    allExplain <- list()

    for (name in colnames(inclSet$drugs)) {
      y <- inclSet$drugs[,name]
      y <- y[!is.na(y)]
      
      #get overlapped samples for each dataset 
      overSample <- names(y)
      
      for (eachSet in inclSet) {
        overSample <- intersect(overSample,rownames(eachSet))
      }
      
      y <- dataScale(y[overSample], censor = censor, robust = robust)
      allResponse[[name]] <- y
    }
      
  #generate explainatory variable
     if ("seahorse" %in% names(inclSet)) {
      seaTab <- inclSet$seahorse[overSample,]
      vecName <- sprintf("metabolism(%s)",ncol(seaTab))
      allExplain[[vecName]] <- apply(seaTab,2,dataScale,censor = censor, robust = robust)
    }
    
    
     if ("gen" %in% names(inclSet)) {
      geneTab <- inclSet$gen[overSample,]
      vecName <- sprintf("genetic(%s)", ncol(geneTab))
      allExplain[[vecName]] <- apply(geneTab,2,dataScale)
    }
    
   
    comboTab <- c()
    for (eachSet in names(allExplain)){
      comboTab <- cbind(comboTab, allExplain[[eachSet]])
    }
    
    vecName <- sprintf("all(%s)", ncol(comboTab))
    allExplain[[vecName]] <- comboTab
    
  return(list(allResponse=allResponse, allExplain=allExplain))

}

6.0.2 Calulate drug responses explained by multi-omics data set

6.0.2.1 Training models

Clean and integrate multi-omics data

inclSet<-list(gen=genData, drugs=viabData, seahorse = sea)
cleanData <- generateData.drug(inclSet, censor = 4)

Function for multi-variate regression without penalization (lm version)

runLM <- function(X, y) {
  res <- summary(lm(y~ 0 + X))
  
  coefTab <- res$coefficients[,c(1,4)]
  rownames(coefTab) <- gsub("X","", rownames(coefTab))
  colnames(coefTab) <- c("coef","p")
  R2 <- res$adj.r.squared
  

  
  list(model = res,  varExplain = R2, coef = coefTab)
}

Perform regression

lmResults <- list()
for (eachMeasure in names(cleanData$allResponse)) {
  dataResult <- list()
  for (eachDataset in names(cleanData$allExplain)) {
    y <- cleanData$allResponse[[eachMeasure]]
    X <- cleanData$allExplain[[eachDataset]]

    glmRes <- runLM(X, y)
    dataResult[[eachDataset]] <- glmRes 
  }
  lmResults[[eachMeasure]] <- dataResult
  
}

Plot the comparison of R2

compareR2 <- lapply(names(lmResults), function(name) {
  tibble(drug = name, 
         genetics = lmResults[[name]]$`genetic(20)`$varExplain,
         metabolism = lmResults[[name]]$`metabolism(11)`$varExplain,
         both = lmResults[[name]]$`all(31)`$varExplain)
}) %>% bind_rows() %>% mutate(diff = both - genetics) %>% arrange(desc(diff))
plotTab <- compareR2 %>% 
  mutate(colLab = ifelse(diff > 0.1, "darkred",
                         ifelse(genetics > 0.4,"darkblue","black"))) %>%
  mutate(labText = ifelse(colLab != "black", drug,""))
plotR2 <- ggplot(plotTab, aes(x=genetics, y = both, label = labText)) + 
  geom_point(aes(col = colLab),size=2) + 
  scale_color_manual(values = c(darkred = "firebrick",darkblue="darkblue",black= "black"))+
  ggrepel::geom_text_repel(size=4) + geom_abline(intercept = 0, slope = 1, color = "red", linetype ="dotted") +
  coord_cartesian(xlim=c(-0.1,0.8),ylim = c(-0.1,0.8)) + theme_bw() +
  theme(axis.text.x = element_text(size=12),
        axis.text.y = element_text(size=12),
        axis.title = element_text(size =12, face = "bold"),
        plot.title = element_text(size=16, face = "bold"),
        legend.position = "none") +
  ylab("Variance explained by bioenergetic and genetic features") +
  xlab("Variance explained by genetic features alone")

Plot selected features for each drug

plotDrugs <- filter(compareR2, diff > 0.1) %>% pull(drug)

#function to plot coefficient for selected drugs
plotCoef <- function(drugList, lmResults, maxy = 3.5) {
  rowNum <- c()
  pList <- list()
  for (name in drugList) {
    eachTab <- lmResults[[name]]$`all(31)`$coef %>% data.frame() %>%
      rownames_to_column("feature") %>% filter(p <= 0.05) %>%
      mutate(feature = formatSea(feature), logP = -log10(p)) %>%
      mutate(direction = ifelse(coef >0, "pos","neg")) %>%
      arrange(logP) %>% mutate(feature = factor(feature, levels=feature))

    
    pList[[name]] <- ggplot(eachTab, aes(x=feature, y = logP, fill = direction)) + 
      geom_bar(stat = "identity", width = 0.7) +
      coord_flip(ylim =c(0,maxy)) + 
      xlab(name) + ylab(expression(-log[10]*'('*p*')')) +
      scale_fill_manual(values = c(pos = "blue",neg = "red"), guide = FALSE) + 
      theme(axis.line.y = element_blank(),
            axis.text.x = element_text(size=10),
            axis.text.y = element_text(size=10),
            axis.title.y = element_text(size=10, face= "bold"),
            axis.title.x = element_text(size =12, face = "bold"),
            plot.title = element_text(size=20, face = "bold"),
            plot.margin = margin(0.1,0,0.1,0, unit= "cm"))
    rowNum <- c(rowNum, nrow(eachTab))
  }
  
  grobList <- lapply(pList, ggplotGrob)
  grobList <- do.call(gridExtra::gtable_rbind, 
                      c(grobList, size = "max"))
  panels <- grobList$layout$t[grep("panel", grobList$layout$name)]
  grobList$heights[panels] <- unit(rowNum, "null")
  return(grobList)       
}

seaCoefs <- plotCoef(plotDrugs, lmResults)

Plot selected features for drug with high vairance explained values

plotDrugs <- arrange(compareR2, desc(genetics)) %>% 
  filter(genetics >0.4) %>% pull(drug)

highCoefs <- plotCoef(plotDrugs, lmResults, maxy = 5)
grid.draw(highCoefs)

7 Generate a combined plot for figure 4

title = ggdraw() + draw_figure_label("Figure 4", fontface = "bold", position = "top.left",size=20)
pout <- ggdraw() +
  draw_plot(drugManhattan, 0, 0.56, 1, 0.42) +
  draw_plot(plotR2, 0, 0.05 , .5, .4) +
  draw_plot(seaCoefs, 0.55, 0 , .38, 0.53) +
  draw_plot_label(c("A", "B", "C"), 
                  c(0, 0, 0.50), c(1, 0.55, 0.55), size = 20)
plot_grid(title, pout, rel_heights = c(0.05,0.95), ncol = 1)


8 Transcriptomic analysis

8.1 Differential expression between M-CLL and U-CLL samples

8.1.1 Data pre-processing

Load data set

data("dds", "patmeta","mutCOM","seaOri")

Only use CLL samples with IGHV annotations and have been used in seahorse experiments

#annotate IGHV status
dds$IGHV <- patmeta[match(dds$PatID, rownames(patmeta)),]$IGHV
dds$diag <- patmeta[match(dds$PatID, rownames(patmeta)),]$Diagnosis

#estimate size factor
dds <- estimateSizeFactors(dds)

#only choose CLL samples with IGHV annotations and have been used in seahorse experiments
ddsCLL <- dds[,dds$diag == "CLL" & !is.na(dds$IGHV) & dds$PatID %in% colnames(seaOri)]

Filter genes

#remove genes without gene symbol annotations
ddsCLL <- ddsCLL[! rowData(ddsCLL)$symbol %in% c(NA,""),]
ddsCLL <- ddsCLL[rowData(ddsCLL)$chromosome != "Y",]

#only keep genes that have counts higher than 10 in any sample
keep <- apply(counts(ddsCLL), 1, function(x) any(x >= 10)) 
ddsCLL <- ddsCLL[keep,]

# Remove transcripts which do not show variance across samples.
sds <- rowSds(counts(ddsCLL, normalized = TRUE))
sh <- shorth(sds)
ddsCLL <- ddsCLL[sds >= sh,]

#how many genes do we have
nrow(ddsCLL)
## [1] 13744

Variance stabilizing tranformation

ddsCLL.norm <- varianceStabilizingTransformation(ddsCLL)

8.1.2 Identify differentially expressed gene between M-CLL and U-CLL using DESeq2

Differential expression using DESeq2

ddsCLL$IGHV <- factor(ddsCLL$IGHV, levels = c("U", "M"))
design(ddsCLL) <- ~ IGHV
ddsCLL <- DESeq(ddsCLL, betaPrior = TRUE)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 720 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing

Get differential expression result

DEres <- as.tibble(results(ddsCLL, tidy = TRUE)) %>% mutate(symbol = rowData(ddsCLL)$symbol)
## Warning: `as.tibble()` is deprecated, use `as_tibble()` (but mind the new semantics).
## This warning is displayed once per session.

8.2 Gene enrichment analysis

Function for converting DEseq results to enrichment analysis input

createInput <- function(DEres, pCut = 0.05, ifFDR = FALSE, rankBy = "stat") {
  if (ifFDR) {
    inputTab <- filter(DEres, padj <= pCut)
  } else {
    inputTab <- filter(DEres, pvalue <= pCut)
  }
  
  inputTab <- arrange(inputTab, pvalue) %>% filter(!duplicated(symbol)) %>% select_("symbol", rankBy) %>% data.frame(stringsAsFactors = FALSE)
  rownames(inputTab) <- inputTab$symbol
  inputTab$symbol <- NULL
  colnames(inputTab) <- "stat"
  return(inputTab)
}

load genesets

gmts = list(H=system.file("extdata","h.all.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"),
            C6=system.file("extdata","c6.all.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"),
            KEGG=system.file("extdata","c2.cp.kegg.v5.1.symbols.gmt", package = "BloodCancerMultiOmics2017"))

Enrichment analysis using Hallmarks gene set

enRes <- list()
inputTab <- createInput(DEres, pCut = 0.1, ifFDR = TRUE)
enRes[["Gene enrichment analysis"]] <- runGSEA(inputTab = inputTab, gmtFile = gmts$H, GSAmethod = "page")
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
#remove the HALLMARK_
enRes$`Gene enrichment analysis`$Name <- gsub("HALLMARK_","", enRes$`Gene enrichment analysis`$Name)

Plot hallmark result

enBar <- plotEnrichmentBar(enRes, pCut = 0.1, ifFDR = TRUE, setName = "Hallmark gene sets")
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
plot(enBar)

8.2.1 Heatmap plot of the expression values of genes in the glycolysis geneset

Prepare the data for heatmap

# load genes in the gene set
gsc <- loadGSC(gmts$H)
geneList <- gsc$gsc$HALLMARK_GLYCOLYSIS

#select differentially expressed genes
fdrCut <- 0.10
sigDE <- filter(DEres, padj <= fdrCut, log2FoldChange < 0) %>% filter(symbol %in% geneList) %>%
  arrange(log2FoldChange)

#get the expression matrix
plotMat <- assay(ddsCLL.norm[sigDE$row,])
colnames(plotMat) <- ddsCLL.norm$PatID
rownames(plotMat) <- sigDE$symbol

#sort columns of plot matrix based on trisomy12 status
plotMat <- plotMat[,order(ddsCLL$IGHV)]

#calculate z-score and sensor
plotMat <- t(scale(t(plotMat)))
plotMat[plotMat >= 4] <- 4
plotMat[plotMat <= -4] <- -4

annoCol <- data.frame(row.names = ddsCLL.norm$PatID, `IGHV` = ddsCLL.norm$IGHV)

Plot the heatmap

#color for colum annotation
annoColor <- list(IGHV = c(M = "red", U = "grey80"))

hallHeatmap <- pheatmap(plotMat, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100),
         cluster_cols = FALSE, cluster_rows = FALSE,
         annotation_col = annoCol, annotation_colors = annoColor,
         show_colnames = FALSE, fontsize_row = 8, breaks = seq(-5,5, length.out = 101), treeheight_row = 0,
         border_color = NA, main = "HALLMARK_GLYCOLYSIS",silent = TRUE)$gtable

grid.draw(hallHeatmap)

8.2.2 Beeswarm plot of expression values of key glycolytic genes

plotGenes <- c("PFKP","PGAM1","PGK1")
plotTab <- data.frame(assay(ddsCLL.norm[rowData(ddsCLL.norm)$symbol %in% plotGenes,]))
colnames(plotTab) <- ddsCLL.norm$PatID
plotTab$symbol <- rowData(ddsCLL.norm[rowData(ddsCLL.norm)$symbol %in% plotGenes,])$symbol
plotTab <- gather(plotTab, key = "PatID", value = "value", -symbol) %>%
  mutate(IGHV = colData(ddsCLL.norm)[match(PatID, ddsCLL.norm$PatID),]$IGHV) %>%
  mutate(p = sigDE[match(symbol, sigDE$symbol),]$pvalue)

pTab <- distinct(plotTab, symbol, p) %>% arrange(symbol)
IGHVcount <- distinct(plotTab, PatID, IGHV) %>% group_by(IGHV) %>%
  summarise(n = length(IGHV)) %>% 
  mutate(label = sprintf("%s-CLL\n(n=%s)",IGHV, n))

scaleFun <- function(x) sprintf("%.1f",x)

beePlot <- ggplot(plotTab, aes(x=IGHV, y = value)) + 
    stat_boxplot(geom = "errorbar", width = 0.3) +
    geom_boxplot(outlier.shape = NA, col="black", width=0.4) + 
    geom_beeswarm(cex=2, size =0.5, aes(col = IGHV)) + theme_classic() +
    xlab("") + ylab("normalized expression") +
    scale_color_manual(values = c("M" = "red","U" = "grey30")) + 
    scale_x_discrete(labels = structure(IGHVcount$label, names = IGHVcount$IGHV)) +
    scale_y_continuous(expand = expand_scale(add = c(0.1,0.6)),
                       labels = scaleFun) +
    theme(axis.line.x = element_blank(), axis.ticks.x = element_blank(),
           axis.title = element_text(size=12, face="bold"),
           axis.text.y = element_text(size=12),
           axis.text.x = element_text(size =10),
           plot.title = element_text(face="bold", hjust=0.5),
           legend.position = "none",
           axis.title.x = element_text(face="bold"),
          strip.background = element_blank(),
          strip.text = element_text(size=13, face = "bold")) +
  facet_wrap(~ symbol, scales = "free")

#add p value annotations
pTab$pt <- sapply(pTab$p, sciPretty)
beePlot <- beePlot + geom_text(pTab, mapping = aes(x = 1.5, y = Inf, 
                                        label = paste("p==~",bquote(.(pt))), 
                                        hjust=0.5, vjust =1), size =3, 
                    parse = T)

8.2.3 Combined plots for manuscript (Figure 3)

title = ggdraw() + draw_figure_label("Figure 3", fontface = "bold", position = "top.left",size=20)
p <- ggdraw() + 
  draw_plot(enBar, 0, 0.5, 0.5, 0.45) + 
  draw_plot(hallHeatmap, 0.5, 0, 0.5, 0.95) +
  draw_plot(beePlot, 0, 0, 0.5, 0.4) +
  draw_plot_label(c("A","B","C"), c(0, 0.5, 0), c(1, 1, 0.45), size=20)
plot_grid(title, p, rel_heights = c(0.05,0.95), ncol = 1)

9 Query public datasets for gene expression signatures in CLL of B-cell receptor triggering

9.1 Expression signature of B-cell receptor triggered by IgM (GSE49695)

9.1.1 Data import

Get public dataset from Gene Expression Omnibus (GEO)

gse <- getGEO("GSE52774", GSEMatrix = TRUE)
gse <- gse[[1]]

Define treatment status

table(gse$`treatment:ch1`)
## 
## co-stimulated with immobilized anti-IgM 
##                                      16 
##                               untreated 
##                                      16
gse$treatment <- factor(ifelse(gse$`treatment:ch1` == "untreated", "untreated","IgM"),
                        levels = c("untreated","IgM"))

9.1.2 Quality assessment

Distribution of raw expression values

boxplot(exprs(gse))

Variance stablizing transformation

gse.vst <- gse
exprs(gse.vst) <- normalizeVSN(gse)

Remove genes without gene symbol

gse.vst <- gse.vst[fData(gse.vst)$`GENE_SYMBOL`!="",]

Remove invariant genes

sds <- rowSds(exprs(gse.vst))
gse.vst <- gse.vst[sds > shorth(sds),]

PCA

exprMat <- exprs(gse.vst)
#using top 5000 variant genes
sds <- rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE) <= 5000,]

pcRes <- prcomp(t(exprMat), center = TRUE, scale. = FALSE)
plotTab <- pcRes$x[,c(1,2)] %>% data.frame() %>% rownames_to_column("sampleID") %>%
  mutate(treatment = gse[,sampleID]$treatment)

ggplot(plotTab, aes(x=PC1, y = PC2, color = treatment)) + geom_point() + 
  theme_bw()

Most treated and untreated samples can be clearly separated.

9.2 Differential expression

Design matrix

mm <- model.matrix( ~  treatment , pData(gse.vst) )

Run Limma

fit <- lmFit(gse.vst, mm)
fit <- eBayes(fit)
resTab <- topTable(fit, coef= "treatmentIgM", number = "all")

P value histogram

hist(resTab$P.Value, breaks = 50, main = "p value histogram", xlab = "pvalues")

9.3 Gene enrichment analysis

Run enrichment analysis using PAGE

inputTab <- dplyr::filter(resTab, adj.P.Val < 0.1) %>%
  dplyr::select(t, GENE_SYMBOL) %>%
  arrange(desc(abs(t))) %>% distinct(GENE_SYMBOL, .keep_all = TRUE) %>%
  data.frame() %>% column_to_rownames("GENE_SYMBOL")

enRes <- list()

gmts = list(H=system.file("extdata","h.all.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"),
            C6=system.file("extdata","c6.all.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"),
            KEGG=system.file("extdata","c2.cp.kegg.v5.1.symbols.gmt", package = "BloodCancerMultiOmics2017"))

enRes[["BCR stimulated by IgM (GSE49695)"]] <- runGSEA(inputTab, 
                               gmtFile = gmts$H,
                               GSAmethod = "page")
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!

Plot enrichment p values (only pathways passing 1% FDR are shown)

enBar1 <- seahorseCLL::plotEnrichmentBar(enRes,pCut = 0.01, ifFDR = TRUE,
                                         setName = "Hallmark gene sets")
grid.draw(enBar1)

Heatmap of genes up-regulated in glycolysis pathway

# load genes in the gene set
gsc <- loadGSC(gmts$H)
geneList <- gsc$gsc$HALLMARK_GLYCOLYSIS

#select differentially expressed genes
fdrCut <- 0.10
sigDE <- filter(resTab, adj.P.Val < fdrCut, logFC > 0) %>% 
  filter(GENE_SYMBOL %in% geneList) %>%
  arrange(desc(logFC)) %>% distinct(GENE_SYMBOL, .keep_all = TRUE)

#get the expression matrix
plotMat <- exprs(gse.vst[sigDE$ID,])
rownames(plotMat) <- sigDE$GENE_SYMBOL

#sort columns of plot matrix based on treatment status
plotMat <- plotMat[,order(gse.vst$treatment)]

annoCol <- data.frame(row.names = colnames(gse.vst), `treatment` = gse.vst$treatment)

Plot the heatmap

#color for colum annotation
annoColor <- list(treatment = c(IgM = "red", untreated = "grey80"))

hallHeatmap1 <- pheatmap(plotMat, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100), scale = "row",
         cluster_cols = FALSE, cluster_rows = FALSE,
         annotation_col = annoCol, annotation_colors = annoColor,
         show_colnames = FALSE, fontsize_row = 8, breaks = seq(-5,5, length.out = 101), treeheight_row = 0,
         border_color = NA, main = "HALLMARK_GLYCOLYSIS (GSE49695)",silent = TRUE)$gtable

grid.draw(hallHeatmap1)

9.4 Gene expression signature in CLL of B-cell receptor triggering by CPG (GSE30105)

9.4.1 Data import

Get public dataset from Gene Expression Omnibus (GEO)

gse <- getGEO("GSE30105", GSEMatrix = TRUE)
gse <- gse[[1]]

Define treatment status

table(gse$title)
## 
## CPG-stimulated B-CLL repl 1 CPG-stimulated B-CLL repl 2 
##                           1                           1 
## CPG-stimulated B-CLL repl 3 CPG-stimulated B-CLL repl 4 
##                           1                           1 
## CPG-stimulated B-CLL repl 5 CPG-stimulated B-CLL repl 6 
##                           1                           1 
## CPG-stimulated B-CLL repl 7 CPG-stimulated B-CLL repl 8 
##                           1                           1 
## CPG-stimulated B-CLL repl 9   Unstimulated B-CLL repl 1 
##                           1                           1 
##   Unstimulated B-CLL repl 2   Unstimulated B-CLL repl 3 
##                           1                           1 
##   Unstimulated B-CLL repl 4   Unstimulated B-CLL repl 5 
##                           1                           1 
##   Unstimulated B-CLL repl 6   Unstimulated B-CLL repl 7 
##                           1                           1 
##   Unstimulated B-CLL repl 8   Unstimulated B-CLL repl 9 
##                           1                           1
gse$treatment <- factor(ifelse(grepl("Unstimulated",gse$title), "untreated","CPG"),
                        levels = c("untreated","CPG"))

9.5 Quality assessment

Distribution of raw expression values

boxplot(exprs(gse))

Variance stablizing transformation

gse.vst <- gse
exprs(gse.vst) <- normalizeVSN(gse)

Remove genes without gene symbol

gse.vst <- gse.vst[fData(gse.vst)$`GENE_SYMBOL`!="",]

Remove invariant genes

sds <- rowSds(exprs(gse.vst))
gse.vst <- gse.vst[sds > shorth(sds),]

PCA

exprMat <- exprs(gse.vst)
#using top 5000 variant genes
sds <- rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE) <= 5000,]
pcRes <- prcomp(t(exprMat), center = TRUE, scale. = FALSE)
plotTab <- pcRes$x[,c(1,2)] %>% data.frame() %>% rownames_to_column("sampleID") %>%
  mutate(treatment = gse[,sampleID]$treatment)

ggplot(plotTab, aes(x=PC1, y = PC2, color = treatment)) + geom_point() + 
  theme_bw()

Most treated and untreated samples can be clearly separated.

9.6 Differential expression

Design matrix

mm <- model.matrix( ~  treatment , pData(gse.vst) )

Run Limma

fit <- lmFit(gse.vst, mm)
fit <- eBayes(fit)
resTab <- topTable(fit, coef= "treatmentCPG", number = "all")

P value histogram

hist(resTab$P.Value, breaks = 50, main = "p value histogram", xlab = "pvalues")

9.7 Gene enrichment analysis

Run enrichment analysis using PAGE

inputTab <- dplyr::filter(resTab, adj.P.Val < 0.1) %>%
  dplyr::select(t, GENE_SYMBOL) %>%
  arrange(desc(abs(t))) %>% distinct(GENE_SYMBOL, .keep_all = TRUE) %>%
  data.frame() %>% column_to_rownames("GENE_SYMBOL")

enRes <- list()

gmts = list(H=system.file("extdata","h.all.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"),
            C6=system.file("extdata","c6.all.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"),
            KEGG=system.file("extdata","c2.cp.kegg.v5.1.symbols.gmt", package = "BloodCancerMultiOmics2017"))

enRes[["BCR stimulated by CPG (GSE30105)"]] <- runGSEA(inputTab, 
                               gmtFile = gmts$H,
                               GSAmethod = "page")
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!

Plot enrichment p values (only pathways passing 1% FDR are shown)

enBar2 <- seahorseCLL::plotEnrichmentBar(enRes,pCut = 0.01, ifFDR = TRUE,setName = "Hallmark gene sets")
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
grid.draw(enBar2)

Heatmap of genes up-regulated in glycolysis pathway

# load genes in the gene set
gsc <- loadGSC(gmts$H)
geneList <- gsc$gsc$HALLMARK_GLYCOLYSIS

#select differentially expressed genes
fdrCut <- 0.10
sigDE <- filter(resTab, adj.P.Val < fdrCut, logFC > 0) %>% 
  filter(GENE_SYMBOL %in% geneList) %>%
  arrange(desc(logFC)) %>% distinct(GENE_SYMBOL, .keep_all = TRUE)

#get the expression matrix
plotMat <- exprs(gse.vst[match(sigDE$NAME,fData(gse.vst)$NAME),])
rownames(plotMat) <- sigDE$GENE_SYMBOL

#sort columns of plot matrix based on treatment status
plotMat <- plotMat[,order(gse.vst$treatment)]

annoCol <- data.frame(row.names = colnames(gse.vst), `treatment` = gse.vst$treatment)

Plot the heatmap

#color for colum annotation
annoColor <- list(treatment = c(CPG= "red", untreated = "grey80"))

hallHeatmap2 <- pheatmap(plotMat, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100), scale = "row",
         cluster_cols = FALSE, cluster_rows = FALSE,
         annotation_col = annoCol, annotation_colors = annoColor,
         show_colnames = FALSE, fontsize_row = 8, breaks = seq(-5,5, length.out = 101), treeheight_row = 0,
         border_color = NA, main = "HALLMARK_GLYCOLYSIS (GSE30105)",silent = TRUE)$gtable

grid.draw(hallHeatmap2)

9.8 Combine the figures for supplementary material

p <- ggdraw() + draw_plot(enBar1, 0, 0.5, 0.6, 0.48) + 
  draw_plot(hallHeatmap1, 0.6, 0.5, 0.4, 0.48) +
  draw_plot(enBar2, 0, 0.1, 0.6, 0.38) +
  draw_plot(hallHeatmap2, 0.6,0, 0.4,0.48) +
  draw_plot_label(c("A","B","C","D"), c(0, 0.55, 0, 0.55), c(1, 1, 0.49, 0.49), fontface = "plain", size=20)
plot_grid(p)


10 Association between the clinical phenotype and energy metabolic features

Load datasets

data("patmeta", "seaCombat","lpdAll", "pretreat","doublingTime")

10.0.1 Correlation between seahorse measurements and pretreatment status

Prepare data table for t-test

testTab <- assays(seaCombat)$seaMedian %>% data.frame() %>% rownames_to_column("Measurement") %>%
  gather(key = "patID", value = "value", -Measurement) %>%
  mutate(pretreated = pretreat[patID,]) %>%
  mutate(IGHV = factor(Biobase::exprs(lpdAll)["IGHV Uppsala U/M",patID])) %>% 
  as.tibble()

Performing t-test for each measurement

tTest <- function(x, y) {
  noNA <- !is.na(x) & !is.na(y)
  res <- t.test(x[noNA] ~ y[noNA], equal.var = TRUE)
  tibble(p = res$p.value,
         dm = res$estimate[[2]] - res$estimate[[1]])
}

pTab <- group_by(testTab, Measurement) %>% do(tTest(.$value, .$pretreated)) %>% ungroup() %>%
  mutate(p.adj = p.adjust(p, method = "BH"))

Perform ANOVA test, including IGHV as a blocking factor

aovTest <- function(x,y,block) {
  noNA <- !is.na(x) & !is.na(y) & !is.na(block)
  dataTab <- data.frame(x = x[noNA], y = y[noNA], block = block[noNA])
  res <- anova(lm(x ~ block + y))
  tibble(p = res$`Pr(>F)`[2])
}

pTab.IGHV <- group_by(testTab, Measurement) %>% do(aovTest(.$value, .$pretreated, .$IGHV)) %>%
  ungroup() %>% mutate(p.adj = p.adjust(p, method = "BH"))

A table as output

outTable <- tibble(`Seahorse mearuement` = formatSea(pTab$Measurement),
                   `p value` = format(pTab$p, digits = 2),
                   `adjusted p` = format(pTab$p.adj, digits = 3),
                   `p value (IGHV blocked)` = format(pTab.IGHV$p, digits = 2),
                   `adjusted p (IGHV blocked)` = format(pTab.IGHV$p.adj, digits = 3))

write(
print(xtable(outTable, digits = 3, 
             caption = "Student's t-test and ANOVA test (IGHV blocked) results of energy metabolic measurements related to pretreatment status"), 
      include.rownames=FALSE,
      caption.placement = "top")
,file = paste0(plotDir,"tTest_SeahorseVSpretreat.tex"))

Association between IGHV status and pretreatment

IGHVtab <- filter(testTab, !duplicated(patID))
chiRes <- chisq.test(IGHVtab$pretreated, IGHVtab$IGHV)
chiRes
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  IGHVtab$pretreated and IGHVtab$IGHV
## X-squared = 11.775, df = 1, p-value = 0.0006002

10.0.2 Plot significant associations as beeswarms plot (for supplementary figures)

Plot

seaList <- c("glycolytic.capacity", "glycolytic.reserve")
plotList <- lapply(seaList, function(seaName) {
  #prepare table for plotting
  plotTab <- filter(testTab, Measurement == seaName) %>% 
    mutate(Measurement = formatSea(Measurement),
           pretreated = ifelse(pretreated, 
                               sprintf("pretreated (n=%s)", sum(pretreated == 1)), 
                               sprintf("untreated (n=%s)", sum(pretreated == 0))),
           IGHV = ifelse(is.na(IGHV), "unknown", 
                         ifelse(IGHV == 1, "mutated", "unmutated"))) %>%
    mutate(IGHV = factor(IGHV, levels = c("mutated","unmutated","unknown"))) %>%
    filter(!is.na(value))
  
  #p value for annotation
  pval <- filter(pTab, Measurement == seaName)$p
  
  #color for IGHV status
  colorList <- c(mutated = "red", unmutated = "black", unknown = "grey80")
  
  #formatted seaname
  seaTitle <- unique(plotTab$Measurement)
  
  #title 
  plotTitle <- paste(sprintf("'%s (p = '~",seaTitle),
                     sciPretty(pval, digits = 2),"*')'")
  
  ggplot(plotTab, aes(x=pretreated, y = value)) + 
    stat_boxplot(geom = "errorbar", width = 0.3) +
    geom_boxplot(outlier.shape = NA, col="black", width=0.4) + 
    geom_beeswarm(cex=2, size =1, aes(col = IGHV)) + theme_classic() +
    xlab("") + ylab("ECAR (pMol/min)") + ggtitle(parse(text=plotTitle)) + 
    scale_color_manual(values = colorList) +
    theme(axis.line.x = element_blank(), axis.ticks.x = element_blank(),
           axis.title = element_text(size=12, face="bold"),
           axis.text = element_text(size=12),
           plot.title = element_text(hjust=0.5,size=15),
           axis.title.x = element_text(face="bold"))
    
})

Save the plot

grid.arrange(grobs = plotList, ncol =2)

10.1 Correlation between seahorse measurement and lymphocyte doubling time

10.1.1 Correlation test

Pre-processing data

testTab <- assays(seaCombat)$seaMedian %>% data.frame() %>% rownames_to_column("Measurement") %>%
  gather(key = "patID", value = "value", -Measurement) %>%
  mutate(pretreated = pretreat[patID,],
         doubling.time = LDT[patID,]) %>%
  mutate(pretreated = factor(pretreated),
         IGHV = factor(Biobase::exprs(lpdAll)["IGHV Uppsala U/M",patID])) %>% 
  filter(!is.na(doubling.time), !is.na(value)) %>%
  as.tibble()

Correlation test between seahorse measurement and doubling time

corTest <- function(x, y, block = NULL, method = "pearson") {
  if (is.null(block)) {
    res <- cor.test(x,y, method = method)
    tibble(p = res$p.value, coef = res$estimate[[1]])
  } else {
    tab <- data.frame(x = x, y = y, block = block)
    res <- summary(lm( y ~ x + block, tab)) #how much y can be explained by x and block
    tibble(p = res$coefficients[2,4],
           coef = cor(x,y))
  }
}

corRes <- group_by(testTab, Measurement) %>% do(corTest(.$value, .$doubling.time)) %>%
  ungroup() %>% mutate(p.adj = p.adjust(p, method = "BH"))

Correlations between lymphocyte doubling time and IGHV stauts

pRes <- filter(testTab, !duplicated(patID)) %>% 
  do(data.frame(p = t.test(doubling.time ~ IGHV,.)$p.value))
pRes
##              p
## 1 2.635161e-07

Correlation test between seahorse measurement and doubling time (Considering IGHV as cofactor)

corRes.aov <- group_by(testTab, Measurement) %>% filter(!is.na(IGHV)) %>%
  do(corTest(x = .$value, y = .$doubling.time, block = .$IGHV)) %>%
  ungroup() %>% mutate(p.adj = p.adjust(p, method = "BH"))

Correlation test between seahorse measurement and doubling time (within M-CLL)

corRes.M <- group_by(testTab, Measurement) %>% filter(IGHV %in% 1) %>%
  do(corTest(x = .$value, y = .$doubling.time)) %>%
  ungroup() %>% mutate(p.adj = p.adjust(p, method = "BH"))

Correlation test between seahorse measurement and doubling time (within U-CLL)

corRes.U <- group_by(testTab, Measurement) %>% filter(IGHV %in% 0) %>%
  do(corTest(x = .$value, y = .$doubling.time)) %>%
  ungroup() %>% mutate(p.adj = p.adjust(p, method = "BH"))

A table for output

outTable <- tibble(`Seahorse mearuement` = formatSea(corRes$Measurement),
                   `p value` = format(corRes$p, digits = 2),
                   `adjusted p` = format(corRes$p.adj, digits = 3),
                   `p value (IGHV blocked)` = format(corRes.aov$p, digits = 3),
                   `adjusted p (IGHV blocked)` = format(corRes.aov$p.adj, digits = 3))

write(
print(xtable(outTable, digits = 3, 
             caption = "Correlation tests between each Seahorse measurements and lymphocyte doubling time"), 
      include.rownames=FALSE,
      caption.placement = "top")
,paste0(plotDir,"tTest_SeahorseVSldt.tex"))

10.1.2 Plot correlations (for supplementary figure)

seaList <- c("glycolysis", "glycolytic.capacity")
plotList.cor <- lapply(seaList, function(seaName) {
  #prepare table for plotting
  plotTab <- filter(testTab, Measurement == seaName) %>% 
    mutate(Measurement = formatSea(Measurement),
           IGHV = ifelse(is.na(IGHV), "unknown", 
                         ifelse(IGHV == 1, "mutated", "unmutated"))) %>%
    mutate(IGHV = factor(IGHV, levels = c("mutated","unmutated","unknown"))) %>%
    filter(!is.na(value), !is.na(doubling.time))
  
  #p value for annotation
  pval <- filter(corRes, Measurement == seaName)$p
  coef <- filter(corRes, Measurement == seaName)$coef
  
  #color for IGHV status
  colorList <- c(mutated = "red", unmutated = "black", unknown = "grey80")
  
  #formatted seaname
  seaTitle <- unique(plotTab$Measurement)
  
  
  #prepare correlation test annotations
  annoText <- paste("'coefficient ='~",format(coef,digits = 2),"*","', p ='~",sciPretty(pval,digits=2))
  
  ggplot(plotTab, aes(x=value,y=doubling.time)) + 
    geom_point(size=1, aes(col = IGHV)) + 
    geom_smooth(method = "lm", se= FALSE) +
    xlab("ECAR (pMol/min)") + ylab("Lymphocyte doubling time (days)") +
    theme_bw() + ggtitle(seaTitle) +
    scale_color_manual(values = colorList) +
    annotate("text", x = -Inf, y = Inf, label = annoText, 
             vjust=1, hjust=0, size = 5, parse = TRUE, col= "darkred") +
    theme(panel.grid = element_blank(),
          axis.title = element_text(size=13, face="bold"),
          axis.text = element_text(size=12),
          plot.title = element_text(face="bold", hjust=0.5, size = 15 ),
          legend.position = c(0.9,0.1),
          axis.title.x = element_text(face="bold"),
          plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))
    
})

Show the plot

grid.arrange(grobs = plotList.cor, ncol =2)

10.2 Correlation between seahorse measurement and survival

10.2.1 Prepare dataset

seaTable <- assays(seaCombat)$seaMedian

survT = patmeta[colnames(seaTable),]
survT[which(survT[,"IGHV"]=="U") ,"IGHV"] = 0
survT[which(survT[,"IGHV"]=="M") ,"IGHV"] = 1
survT$IGHV = as.numeric(survT$IGHV)
survT$pretreatment <- pretreat[rownames(survT),]
colnames(survT) = gsub("Age4Main", "age", colnames(survT))
survT$treatment <- survT$treatedAfter | survT$pretreatment
#add seahorse measurement information
survT <- cbind(survT, t(seaTable))

10.2.2 Univariate survival analysis

10.2.2.1 Caclulation correlations

Function to calculate correlations

com <- function( Time, endpoint, scaleX, sub, d, split, drug_names) {  
  
  res <- lapply(d, function(g)  { 
  
  #drug <- survT[,g] * scaleX
  drug <- survT[,g]
  #drug <- (drug - mean(drug, na.rm = TRUE))/sd(drug, na.rm=TRUE)
  ## all=99, M-CLL=1, U-CLL=0
  if(sub==99) { surv <- coxph(Surv(survT[,paste0(Time)],
                                   survT[,paste0(endpoint)] == TRUE) ~ drug)} 
  if(sub<99)  { surv <- coxph(Surv(survT[,paste0(Time)],
                                   survT[,paste0(endpoint)] == TRUE) ~ drug,
                              subset=survT[,paste0(split)]==sub)}    
  
  c(summary(surv)[[7]][,5], summary(surv)[[7]][,2], 
                summary(surv)[[8]][,3], 
                summary(surv)[[8]][,4])
 })
 s <- do.call(rbind, res)
 colnames(s) <- c("p", "HR", "lower", "higher")
 rownames(s) <- drug_names
 s
}

All samples

d <- rownames(seaTable)
sea_names <- formatSea(d)

ttt <- com(Time="T5", endpoint="treatedAfter", sub=99, d=d,
          split="", drug_names=sea_names, scaleX=1)

   
os <-  com(Time="T6", endpoint="died", sub=99, d=d, split="",
          drug_names=sea_names, scaleX=1)

#TTT result
ttt
##                                     p        HR      lower   higher
## basal respiration          0.78487361 1.0045508 0.97232990 1.037839
## ATP production             0.61856940 1.0089384 0.97420635 1.044909
## proton leak                0.79523162 0.9941888 0.95137805 1.038926
## maximal respiration        0.04329390 1.0083468 1.00025052 1.016509
## spare respiratory capacity 0.02330588 1.0109372 1.00148009 1.020484
## glycolysis                 0.43103151 1.0277871 0.96000913 1.100350
## glycolytic capacity        0.05305026 1.0258682 0.99966404 1.052759
## glycolytic reserve         0.03378313 1.0358886 1.00270606 1.070169
## ECAR/OCR                   0.17679455 0.2350961 0.02876541 1.921411
## OCR                        0.10457119 1.0250543 0.99487711 1.056147
## ECAR                       0.57398172 0.9790316 0.90930921 1.054100
#OS result
os
##                                      p        HR      lower    higher
## basal respiration          0.445743838 1.0209332 0.96799051  1.076772
## ATP production             0.054477450 1.0627155 0.99883212  1.130685
## proton leak                0.230798917 0.9585454 0.89441307  1.027276
## maximal respiration        0.241401237 1.0082235 0.99450296  1.022133
## spare respiratory capacity 0.260001165 1.0093938 0.99310455  1.025950
## glycolysis                 0.153194519 1.0908935 0.96813843  1.229213
## glycolytic capacity        0.005552633 1.0728828 1.02084207  1.127576
## glycolytic reserve         0.003918966 1.0943347 1.02931759  1.163459
## ECAR/OCR                   0.867915535 1.3239198 0.04849462 36.143461
## OCR                        0.727618843 1.0086884 0.96076018  1.059008
## ECAR                       0.623678252 1.0313524 0.91169617  1.166713

M-CLL samples

d <- rownames(seaTable)
sea_names <- formatSea(d)

ttt.M <- com(Time="T5", endpoint="treatedAfter", sub=1, d=d,
          split="IGHV", drug_names=sea_names, scaleX=1)

   
os.M <-  com(Time="T6", endpoint="died", sub=1, d=d, 
           split="IGHV",drug_names=sea_names, scaleX=1)

#TTT result
ttt.M
##                                     p         HR        lower   higher
## basal respiration          0.66972710 1.01302726 0.9545326975 1.075106
## ATP production             0.26823880 1.03545664 0.9735172775 1.101337
## proton leak                0.39840912 0.96862771 0.8995549818 1.043004
## maximal respiration        0.53665512 1.00492687 0.9893814783 1.020717
## spare respiratory capacity 0.57126071 1.00514690 0.9874422205 1.023169
## glycolysis                 0.19675546 0.91814324 0.8064834965 1.045263
## glycolytic capacity        0.83294236 0.99483645 0.9481134323 1.043862
## glycolytic reserve         0.65507343 1.01398165 0.9540555009 1.077672
## ECAR/OCR                   0.08563539 0.02940216 0.0005271614 1.639891
## OCR                        0.28309251 1.03189088 0.9744044931 1.092769
## ECAR                       0.23953728 0.92290950 0.8074193715 1.054919
#OS result
os.M
##                                     p        HR      lower       higher
## basal respiration          0.30412487 0.9499771 0.86140242    1.0476596
## ATP production             0.27513686 1.0632632 0.95234566    1.1870990
## proton leak                0.01414948 0.8720443 0.78169459    0.9728369
## maximal respiration        0.34145002 0.9847134 0.95395111    1.0164676
## spare respiratory capacity 0.46372706 0.9870185 0.95311644    1.0221264
## glycolysis                 0.95777589 1.0058566 0.81031426    1.2485865
## glycolytic capacity        0.13875994 1.0781006 0.97593525    1.1909610
## glycolytic reserve         0.06252895 1.1162450 0.99426344    1.2531918
## ECAR/OCR                   0.46124679 7.5627436 0.03477659 1644.6435373
## OCR                        0.29729407 0.9364032 0.82755839    1.0595638
## ECAR                       0.97528422 1.0035431 0.80235073    1.2551851

U-CLL samples

d <- rownames(seaTable)
sea_names <- formatSea(d)

ttt.U <- com(Time="T5", endpoint="treatedAfter", sub=0, d=d,
          split="IGHV", drug_names=sea_names, scaleX=1)

   
os.U <-  com(Time="T6", endpoint="died", sub=0, d=d, 
           split="IGHV",drug_names=sea_names, scaleX=1)

#TTT result
ttt.U
##                                     p        HR      lower    higher
## basal respiration          0.57970659 0.9884185 0.94849803  1.030019
## ATP production             0.45865707 0.9827452 0.93853098  1.029042
## proton leak                0.86897345 1.0054747 0.94232066  1.072861
## maximal respiration        0.27177986 1.0054096 0.99578218  1.015130
## spare respiratory capacity 0.14010915 1.0085995 0.99719158  1.020138
## glycolysis                 0.38446733 1.0468223 0.94424958  1.160537
## glycolytic capacity        0.06412587 1.0360918 0.99792112  1.075723
## glycolytic reserve         0.04355106 1.0517270 1.00146086  1.104516
## ECAR/OCR                   0.63296402 0.4761677 0.02265924 10.006323
## OCR                        0.51066854 1.0123061 0.97607840  1.049878
## ECAR                       0.69072636 0.9805743 0.89025029  1.080062
#OS result
os.U
##                                     p        HR       lower    higher
## basal respiration          0.35128211 1.0296727 0.968269501  1.094970
## ATP production             0.40353629 1.0318067 0.958719943  1.110465
## proton leak                0.73041348 1.0175484 0.921682426  1.123386
## maximal respiration        0.25523712 1.0084698 0.993923097  1.023229
## spare respiratory capacity 0.28477364 1.0095579 0.992113741  1.027309
## glycolysis                 0.30623093 1.0918435 0.922700156  1.291993
## glycolytic capacity        0.06146802 1.0659453 0.996936433  1.139731
## glycolytic reserve         0.05659909 1.0861820 0.997679695  1.182535
## ECAR/OCR                   0.95731501 0.8838535 0.009613555 81.259948
## OCR                        0.82813436 1.0059134 0.953769322  1.060908
## ECAR                       0.74229021 1.0253082 0.883397069  1.190016

10.2.3 KM plot

Function for KM plot

ggkm <- function(response, time, endpoint, titlePlot = "KM plot", pval = NULL, stat = "median") { 
  #function for km plot
  survS <- data.frame(time = time,
                      endpoint = endpoint)
  
  if (stat == "maxstat") {
    ms <- maxstat.test(Surv(time, endpoint)  ~ response, 
                               data = survS,
                               smethod = "LogRank",
                               minprop = 0.2, 
                               maxprop = 0.8, 
                               alpha = NULL)
    
    survS$group <- factor(ifelse(response >= ms$estimate, "high", "low"))
    
  } else if (stat == "median") {
    med <- median(response, na.rm = TRUE)
    survS$group <- factor(ifelse(response >= med, "high", "low"))
  } else if (stat == "binary") {
    survS$group <- factor(response)
  }
  
  if (is.null(pval)) pval = TRUE
  p <- ggsurvplot(survfit(Surv(time, endpoint) ~ group, data = survS), 
                              data = survS, pval = TRUE,  conf.int = TRUE,

                              ylab = "Fraction", xlab = "Time (years)", title = titlePlot,
                  ggtheme = theme_bw() + theme(plot.title = element_text(hjust =0.5)))$plot
  
  p
}

#a fucntion to assign unit, either ECAR or OCR or none
giveUnit <- function(x) {
  ocrList <- c("basal.respiration","ATP.production","proton.leak","maximal.respiration",
               "spare.respiratory.capacity","OCR")
  ecarList <- c("glycolysis","glycolytic.capacity","glycolytic.reserve","ECAR")
  if (x %in%  ocrList) 
    return("OCR")
  else if (x %in% ecarList) 
    return("ECAR")
  else return("")
}

10.2.3.1 Stratrified by metabolism

TTT

tttList <- lapply(rownames(seaCombat), function(n) {
  survS <- data.frame(time = survT$T5,
                      endpoint = survT$treatedAfter)
  val <- survT[[n]]
  ms <- maxstat.test(Surv(time, endpoint)  ~ val, 
                               data = survS,
                               smethod = "LogRank",
                               minprop = 0.2, 
                               maxprop = 0.8, 
                               alpha = NULL)
  
   plotTab <- survS %>% mutate(response = val) %>%
    filter(!is.na(response), !is.na(endpoint),!is.na(time)) %>%
    mutate(group = ifelse(response >= ms$estimate, 
                          sprintf("high %s (%s >= %1.1f, n=",formatSea(n),giveUnit(n),ms$estimate), 
                          sprintf("low %s (%s < %1.1f, n=",formatSea(n),giveUnit(n),ms$estimate)))
  nTab <- table(plotTab$group)
  plotTab <- mutate(plotTab, group = paste0(group,nTab[group],")"))
  
  pval <- sprintf("p = %1.3f",ttt[formatSea(n),"p"])
  p <- ggsurvplot(survfit(Surv(time, endpoint) ~ group, data = plotTab), 
                              data = plotTab, pval = pval,  conf.int = FALSE,
                  legend = c(0.6, 0.12),pval.coord = c(0.1,0.25),
                  legend.labs = sort(unique(plotTab$group)),
                  legend.title = "group", palette = "Dark2",
                  ylab = "Fraction treatment free", xlab = "Time (years)", title = formatSea(n),
                  ggtheme = theme_classic() + theme(axis.title = element_text(size=13, face="bold"),
                                                    axis.text = element_text(size=12),
                                                    plot.title = element_text(face="bold", hjust=0.5, size = 15 ),
                                                    axis.title.x = element_text(face="bold"),
                                                    plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm")))$plot
})
names(tttList) <- names(seaCombat)

OS

osList <- lapply(rownames(seaCombat), function(n) {
  survS <- data.frame(time = survT$T6,
                      endpoint = survT$died)
  val <- survT[[n]]
  ms <- maxstat.test(Surv(time, endpoint)  ~ val, 
                               data = survS,
                               smethod = "LogRank",
                               minprop = 0.2, 
                               maxprop = 0.8, 
                               alpha = NULL)
  
   plotTab <- survS %>% mutate(response = val) %>%
    filter(!is.na(response), !is.na(endpoint),!is.na(time)) %>%
    mutate(group = ifelse(response >= ms$estimate, 
                          sprintf("high %s (%s >= %1.1f, n=",formatSea(n),giveUnit(n),ms$estimate), 
                          sprintf("low %s (%s < %1.1f, n=",formatSea(n),giveUnit(n),ms$estimate)))
  nTab <- table(plotTab$group)
  plotTab <- mutate(plotTab, group = paste0(group,nTab[group],")"))
  
  pval <- sprintf("p = %1.3f",os[formatSea(n),"p"])
  p <- ggsurvplot(survfit(Surv(time, endpoint) ~ group, data = plotTab), 
                              data = plotTab, pval = pval,  conf.int = FALSE,
                  legend = c(0.6,0.12),pval.coord = c(0.1,0.25),
                  legend.labs = sort(unique(plotTab$group)),
                  legend.title = "group", palette = "Dark2",
                  ylab = "Fraction overall survival", xlab = "Time (years)", title = formatSea(n),
                  ggtheme = theme_classic() + theme(axis.title = element_text(size=13, face="bold"),
                                                    axis.text = element_text(size=12),
                                                    plot.title = element_text(face="bold", hjust=0.5, size = 15 ),
                                                    axis.title.x = element_text(face="bold"),
                                                    plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm")))$plot
})
names(osList) <- names(seaCombat)

KM plot for significant assocations in single variante cox model

p<-plot_grid(tttList$glycolytic.reserve, tttList$maximal.respiration,
          tttList$spare.respiratory.capacity,
          osList$glycolytic.capacity, osList$glycolytic.reserve,
          labels = NULL)
p

10.2.3.2 Straitified by both metabolism and IGHV status

TTT

tttKm <- lapply(rownames(seaCombat), function(n) {
  survS <- data.frame(time = survT$T5,
                      endpoint = survT$treatedAfter)
  val <- survT[[n]]
  ms <- maxstat.test(Surv(time, endpoint)  ~ val, 
                               data = survS,
                               smethod = "LogRank",
                               minprop = 0.2, 
                               maxprop = 0.8, 
                               alpha = NULL)
  
   plotTab <- survS %>% mutate(response = val, IGHV = survT$IGHV) %>%
    filter(!is.na(response),!is.na(IGHV), !is.na(endpoint),!is.na(time)) %>%
    mutate(group = ifelse(response >= ms$estimate, 
                          sprintf("high %s (%s >= %1.1f, n=",formatSea(n),giveUnit(n),ms$estimate), 
                          sprintf("low %s (%s < %1.1f, n=",formatSea(n),giveUnit(n),ms$estimate))) %>%
    mutate(group = ifelse(IGHV==1, paste0("M-CLL with ",group), paste0("U-CLL with ",group)))
  nTab <- table(plotTab$group)
  plotTab <- mutate(plotTab, group = paste0(group,nTab[group],")"))
  p <- ggsurvplot(survfit(Surv(time, endpoint) ~ group, data = plotTab), 
                              data = plotTab, pval = TRUE,  conf.int = FALSE,
                  legend = c(0.6,0.2),legend.labs = sort(unique(plotTab$group)),
                   pval.coord = c(0.1,0.4),
                  legend.title = "group", palette = "Dark2",
                  ylab = "Fraction treatment free", xlab = "Time (years)", title = formatSea(n),
                  ggtheme = theme_classic() + theme(axis.title = element_text(size=13, face="bold"),
                                                    axis.text = element_text(size=12),
                                                    plot.title = element_text(face="bold", hjust=0.5, size = 15 ),
                                                    axis.title.x = element_text(face="bold"),
                                                    plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm")))$plot
})
names(tttKm) <- names(seaCombat)

OS

osKm <- lapply(rownames(seaCombat), function(n) {
  survS <- data.frame(time = survT$T6,
                      endpoint = survT$died)
  val <- survT[[n]]
  ms <- maxstat.test(Surv(time, endpoint)  ~ val, 
                               data = survS,
                               smethod = "LogRank",
                               minprop = 0.2, 
                               maxprop = 0.8, 
                               alpha = NULL)
  plotTab <- survS %>% mutate(response = val, IGHV = survT$IGHV) %>%
    filter(!is.na(response),!is.na(IGHV), !is.na(endpoint),!is.na(time)) %>%
    mutate(group = ifelse(response >= ms$estimate, 
                          sprintf("high %s (%s >= %1.1f, n=",formatSea(n),giveUnit(n),ms$estimate), 
                          sprintf("low %s (%s < %1.1f, n=",formatSea(n),giveUnit(n),ms$estimate))) %>%
    mutate(group = ifelse(IGHV==1, paste0("M-CLL with ",group), paste0("U-CLL with ",group)))
  nTab <- table(plotTab$group)
  plotTab <- mutate(plotTab, group = paste0(group,nTab[group],")"))
  p <- ggsurvplot(survfit(Surv(time, endpoint) ~ group, data = plotTab), 
                              data = plotTab, pval = TRUE,  conf.int = FALSE,
                  legend = c(0.6,0.2),legend.labs = sort(unique(plotTab$group)),
                  pval.coord = c(0.1,0.4),
                  legend.title = "group", palette = "Dark2",
                  ylab = "Fraction overall survival", xlab = "Time (years)", title = formatSea(n),
                  ggtheme = theme_classic() + theme(axis.title = element_text(size=13, face="bold"),
                                                    axis.text = element_text(size=12),
                                                    plot.title = element_text(face="bold", hjust=0.5, size = 15 ),
                                                    axis.title.x = element_text(face="bold"),
                                                    plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm")))$plot
})
names(osKm) <- names(seaCombat)

10.3 Multi-variate cox model

10.3.1 Prepare dataset

#add genetic variants to survival table
survT$SF3B1      <- Biobase::exprs(lpdAll)[ "SF3B1",      rownames(survT)  ]
survT$NOTCH1     <- Biobase::exprs(lpdAll)[ "NOTCH1",     rownames(survT)  ]
survT$BRAF       <- Biobase::exprs(lpdAll)[ "BRAF",       rownames(survT)  ]
survT$TP53       <- Biobase::exprs(lpdAll)[ "TP53",       rownames(survT)  ]
survT$del17p13   <- Biobase::exprs(lpdAll)[ "del17p13",   rownames(survT)  ]
survT$del11q22.3 <- Biobase::exprs(lpdAll)[ "del11q22.3", rownames(survT)  ]
survT$trisomy12 <-  Biobase::exprs(lpdAll)[ "trisomy12", rownames(survT)  ]
extractSome <- function(x) {
  sumsu <- summary(x)
  data.frame(
    `p-value`      = 
      sprintf("%6.3g", sumsu[["coefficients"]][, "Pr(>|z|)"]),
    `HR`           = 
      sprintf("%6.3g", signif( sumsu[["coefficients"]][, "exp(coef)"], 2) ), 
    `lower 95% CI` = 
      sprintf("%6.3g", signif( sumsu[["conf.int"]][, "lower .95"], 2) ),
    `upper 95% CI` = 
      sprintf("%6.3g", signif( sumsu[["conf.int"]][, "upper .95"], 2),
              check.names = FALSE) )
}

Define covariates and effects.

10.3.1.1 TTT

glycolytic reserve

surv1 <- coxph(
  Surv(T5, treatedAfter) ~  
    age +
    as.factor(trisomy12) +
    as.factor(del11q22.3) +
    as.factor(del17p13) +
    as.factor(TP53) +
    as.factor(IGHVwt) +
    glycolytic.reserve,       # continuous
  data = survT )

colFactor <- data.frame(factor = c("age",
                                   "trisomy12", "del11q22.3", 
                                   "del17p13","TP53","U-CLL",
                                   "glycolytic reserve"))

outTab <- cbind(colFactor,extractSome(surv1))

write(print(xtable(outTab,
            caption = "Multivariate Cox regression model for time to treatment with glycolytic reserve as a covariate"), 
      include.rownames=FALSE,
      caption.placement = "top"), file = paste0(plotDir,"glyRes_TTT.tex"))
## % latex table generated in R 3.5.0 by xtable 1.8-3 package
## % Fri Feb  8 17:21:05 2019
## \begin{table}[ht]
## \centering
## \caption{Multivariate Cox regression model for time to treatment with glycolytic reserve as a covariate} 
## \begin{tabular}{lllll}
##   \hline
## factor & p.value & HR & lower.95..CI & upper.95..CI \\ 
##   \hline
## age & 0.0397 &   0.77 &   0.61 &   0.99 \\ 
##   trisomy12 &  0.594 &    1.3 &   0.53 &    3.1 \\ 
##   del11q22.3 &  0.622 &    1.2 &   0.55 &    2.7 \\ 
##   del17p13 &  0.556 &    1.3 &   0.55 &      3 \\ 
##   TP53 & 0.0125 &    2.6 &    1.2 &    5.6 \\ 
##   U-CLL &  0.108 &    1.8 &   0.88 &    3.6 \\ 
##   glycolytic reserve &  0.095 &      1 &   0.99 &    1.1 \\ 
##    \hline
## \end{tabular}
## \end{table}
cat(sprintf("%s patients considerd in the model; number of events %1g\n", 
            summary(surv1)$n, summary(surv1)[6] ) )
## 89 patients considerd in the model; number of events 47

maximal respiration

surv1 <- coxph(
  Surv(T5, treatedAfter) ~  
    age +
    as.factor(trisomy12) +
    as.factor(del11q22.3) +
    as.factor(del17p13) +
    as.factor(TP53) +
    IGHVwt +
    maximal.respiration,       # continuous
  data = survT )

colFactor <- data.frame(factor = c("age", 
                                   "trisomy12", "del11q22.3", 
                                   "del17p13","TP53","U-CLL",
                                   "maximal respiration"))

outTab <- cbind(colFactor,extractSome(surv1))

write(print(xtable(outTab,
            caption = "Multivariate Cox regression model for time to treatment with maximal respiration as a covariate"), 
      include.rownames=FALSE,
      caption.placement = "top"), file = paste0(plotDir,"maxRes_TTT.tex"))
## % latex table generated in R 3.5.0 by xtable 1.8-3 package
## % Fri Feb  8 17:21:05 2019
## \begin{table}[ht]
## \centering
## \caption{Multivariate Cox regression model for time to treatment with maximal respiration as a covariate} 
## \begin{tabular}{lllll}
##   \hline
## factor & p.value & HR & lower.95..CI & upper.95..CI \\ 
##   \hline
## age & 0.0169 &   0.77 &   0.62 &   0.95 \\ 
##   trisomy12 &  0.336 &    1.5 &   0.66 &    3.3 \\ 
##   del11q22.3 &   0.18 &    1.6 &   0.79 &    3.4 \\ 
##   del17p13 &  0.581 &    1.3 &   0.54 &      3 \\ 
##   TP53 & 0.00532 &      3 &    1.4 &    6.4 \\ 
##   U-CLL & 0.0354 &      2 &      1 &    3.8 \\ 
##   maximal respiration & 0.0743 &      1 &      1 &      1 \\ 
##    \hline
## \end{tabular}
## \end{table}
cat(sprintf("%s patients considerd in the model; number of events %1g\n", 
            summary(surv1)$n, summary(surv1)[6] ) )
## 102 patients considerd in the model; number of events 55

spare respiratory capacity

surv1 <- coxph(
  Surv(T5, treatedAfter) ~  
    age +
    as.factor(trisomy12) +
    as.factor(del11q22.3) +
    as.factor(del17p13) +
    as.factor(TP53) +
    IGHVwt +
    spare.respiratory.capacity,       # continuous
  data = survT )

colFactor <- data.frame(factor = c("age", 
                                   "trisomy12", "del11q22.3", 
                                   "del17p13","TP53","U-CLL",
                                   "spare.respiratory.capacity"))

outTab <- cbind(colFactor,extractSome(surv1))

write(print(xtable(outTab,
            caption = "Multivariate Cox regression model for time to treatment with glycolytic reserve as a covariate"), 
      include.rownames=FALSE,
      caption.placement = "top"), file = paste0(plotDir,"spResCap_TTT.tex"))
## % latex table generated in R 3.5.0 by xtable 1.8-3 package
## % Fri Feb  8 17:21:06 2019
## \begin{table}[ht]
## \centering
## \caption{Multivariate Cox regression model for time to treatment with glycolytic reserve as a covariate} 
## \begin{tabular}{lllll}
##   \hline
## factor & p.value & HR & lower.95..CI & upper.95..CI \\ 
##   \hline
## age & 0.0191 &   0.77 &   0.62 &   0.96 \\ 
##   trisomy12 &  0.328 &    1.5 &   0.67 &    3.4 \\ 
##   del11q22.3 &  0.187 &    1.6 &   0.79 &    3.4 \\ 
##   del17p13 &  0.572 &    1.3 &   0.54 &      3 \\ 
##   TP53 & 0.00743 &    2.9 &    1.3 &    6.1 \\ 
##   U-CLL & 0.0332 &      2 &    1.1 &    3.8 \\ 
##   spare.respiratory.capacity & 0.0672 &      1 &      1 &      1 \\ 
##    \hline
## \end{tabular}
## \end{table}
cat(sprintf("%s patients considerd in the model; number of events %1g\n", 
            summary(surv1)$n, summary(surv1)[6] ) )
## 102 patients considerd in the model; number of events 55

10.3.2 OS

glycolytic reserve

surv1 <- coxph(
  Surv(T6, died) ~  
    age + as.factor(treatment) +
    as.factor(trisomy12) +
    as.factor(del11q22.3) +
    as.factor(del17p13) +
    as.factor(TP53) +
    IGHVwt +
    glycolytic.reserve,       # continuous
  data = survT )

colFactor <- data.frame(factor = c("age", "treatment",
                                   "trisomy12", "del11q22.3", 
                                   "del17p13","TP53","U-CLL",
                                   "glycolytic reserve"))

outTab <- cbind(colFactor,extractSome(surv1))

write(print(xtable(outTab,
            caption = "Multivariate Cox regression model for overall survival with glycolytic reserve as a covariate"), 
      include.rownames=FALSE,
      caption.placement = "top"), file = paste0(plotDir,"glyRes_OS.tex"))
## % latex table generated in R 3.5.0 by xtable 1.8-3 package
## % Fri Feb  8 17:21:06 2019
## \begin{table}[ht]
## \centering
## \caption{Multivariate Cox regression model for overall survival with glycolytic reserve as a covariate} 
## \begin{tabular}{lllll}
##   \hline
## factor & p.value & HR & lower.95..CI & upper.95..CI \\ 
##   \hline
## age &  0.413 &    1.2 &   0.79 &    1.8 \\ 
##   treatment &  0.206 &    2.5 &   0.61 &    9.9 \\ 
##   trisomy12 &  0.265 &    2.4 &   0.52 &     11 \\ 
##   del11q22.3 &  0.629 &   0.71 &   0.17 &    2.9 \\ 
##   del17p13 &   0.79 &    0.8 &   0.16 &      4 \\ 
##   TP53 &  0.504 &    1.6 &   0.42 &    5.9 \\ 
##   U-CLL & 0.0947 &      3 &   0.83 &     11 \\ 
##   glycolytic reserve & 0.0325 &    1.1 &      1 &    1.2 \\ 
##    \hline
## \end{tabular}
## \end{table}
cat(sprintf("%s patients considerd in the model; number of events %1g\n", 
            summary(surv1)$n, summary(surv1)[6] ) )
## 95 patients considerd in the model; number of events 16
write.csv2(outTab, "glycolytic.reserve_VS_OS.csv")

glycolytic capacity

surv1 <- coxph(
  Surv(T6, died) ~  
    age + as.factor(treatment) +
    as.factor(trisomy12) +
    as.factor(del11q22.3) +
    as.factor(del17p13) +
    as.factor(TP53) +
    IGHVwt +
    glycolytic.capacity,       # continuous
  data = survT )

colFactor <- data.frame(factor = c("age", "treatment",
                                   "trisomy12", "del11q22.3", 
                                   "del17p13","TP53","U-CLL",
                                   "glycolytic capacity"))

outTab <- cbind(colFactor,extractSome(surv1))

write(print(xtable(outTab,
            caption = "Multivariate Cox regression model for overall survival with glycolytic capacity as a covariate"), 
      include.rownames=FALSE,
      caption.placement = "top"), file = paste0(plotDir,"glyCap_OS.tex"))
## % latex table generated in R 3.5.0 by xtable 1.8-3 package
## % Fri Feb  8 17:21:06 2019
## \begin{table}[ht]
## \centering
## \caption{Multivariate Cox regression model for overall survival with glycolytic capacity as a covariate} 
## \begin{tabular}{lllll}
##   \hline
## factor & p.value & HR & lower.95..CI & upper.95..CI \\ 
##   \hline
## age &  0.546 &    1.1 &   0.76 &    1.7 \\ 
##   treatment &  0.178 &    2.6 &   0.65 &     10 \\ 
##   trisomy12 &  0.312 &    2.2 &   0.48 &    9.7 \\ 
##   del11q22.3 &  0.494 &   0.61 &   0.15 &    2.5 \\ 
##   del17p13 &  0.644 &   0.68 &   0.13 &    3.6 \\ 
##   TP53 &  0.469 &    1.7 &   0.42 &    6.5 \\ 
##   U-CLL &  0.101 &    2.9 &   0.81 &     10 \\ 
##   glycolytic capacity &  0.046 &    1.1 &      1 &    1.1 \\ 
##    \hline
## \end{tabular}
## \end{table}
cat(sprintf("%s patients considerd in the model; number of events %1g\n", 
            summary(surv1)$n, summary(surv1)[6] ))
## 95 patients considerd in the model; number of events 16
write.csv2(outTab, "glycolytic.capacity_VS_OS.csv")

11 Association between bioenergetic features and CD38/CD49d expression

11.1 Prepare data

Query CD38 and CD49d (ITGA4) expressions from RNAseq data

data("dds")

geneList <- c("CD38","ITGA4")
ddsSea <- dds[,dds$PatID %in% colnames(seaOri)]
ddsSea <- estimateSizeFactors(ddsSea)
ddsSea <- varianceStabilizingTransformation(ddsSea)
ddsSea <- ddsSea[rowData(ddsSea)$symbol %in% geneList,]
countSea <- assay(ddsSea)
rownames(countSea) <- rowData(ddsSea)$symbol
colnames(countSea) <- ddsSea$PatID
countSea <- data.frame(t(countSea)) %>% rownames_to_column("patID")

Combine the phenotype table and subset for patients with Seahorse measurement

phenoTab <- countSea[match(colnames(seaCombat), countSea$patID),] %>% 
  filter(!is.na(patID)) %>%
  mutate(IGHV = Biobase::exprs(lpdAll)["IGHV Uppsala U/M",patID]) %>%
  mutate(IGHV = ifelse(is.na(IGHV),NA, ifelse(IGHV ==1, "M","U")))

11.1.1 Correlation test

seaTab <- assay(seaCombat)[,phenoTab$patID]
stopifnot(all(colnames(seaTab) == phenoTab$patID))
corRes <- lapply(rownames(seaTab), function(seaName) {
  lapply(colnames(phenoTab)[2:3], function(phenoName){
    testTab <- data.frame(sea = seaTab[seaName,], pheno = phenoTab[[phenoName]], IGHV = as.factor(phenoTab$IGHV))
    lmRes <- summary(lm(pheno ~ sea, data = testTab, na.action = na.omit))
    lmRes.IGHV <- summary(lm(pheno ~ sea + IGHV, data = testTab, na.action = na.omit))
    data.frame(measurement = seaName, phenotype = phenoName,
               p = lmRes$coefficients[2,4], p.IGHV = lmRes.IGHV$coefficients[2,4],
               stringsAsFactors = FALSE)
  }) %>% bind_rows() 
}) %>% bind_rows() %>% arrange(p) %>% mutate(p.adj = p.adjust(p, method = "BH"), 
                               p.IGHV.adj = p.adjust(p.IGHV, method = "BH"))

Save a table for supplementary table

tabOut <- filter(corRes, p.adj <= 0.05) %>% mutate(measurement = formatSea(measurement)) %>%
  dplyr::rename(Measurement = measurement, Gene = phenotype,
        `p value` = p, `p value (IGHV blocked)` = p.IGHV,
        `adjusted p value` = p.adj, `adjusted p value (IGHV blocked)`= p.IGHV.adj)


write(print(xtable(tabOut, digits = 3, 
             caption = "Correlation test results between energy metabolic measurements and CD38/IGTA4(CD49d) expression"), 
      include.rownames=FALSE,
      caption.placement = "top"), file = paste0(plotDir,"corTest_seaVSCD38.tex"))
## % latex table generated in R 3.5.0 by xtable 1.8-3 package
## % Fri Feb  8 17:21:06 2019
## \begin{table}[ht]
## \centering
## \caption{Correlation test results between energy metabolic measurements and CD38/IGTA4(CD49d) expression} 
## \begin{tabular}{llrrrr}
##   \hline
## Measurement & Gene & p value & p value (IGHV blocked) & adjusted p value & adjusted p value (IGHV blocked) \\ 
##   \hline
## glycolytic capacity & CD38 & 0.000 & 0.001 & 0.001 & 0.021 \\ 
##   glycolytic reserve & CD38 & 0.000 & 0.003 & 0.003 & 0.033 \\ 
##   glycolysis & CD38 & 0.001 & 0.019 & 0.004 & 0.083 \\ 
##   maximal respiration & CD38 & 0.002 & 0.032 & 0.009 & 0.083 \\ 
##   glycolysis & ITGA4 & 0.003 & 0.028 & 0.013 & 0.083 \\ 
##   ECAR & CD38 & 0.005 & 0.009 & 0.015 & 0.067 \\ 
##   spare respiratory capacity & CD38 & 0.005 & 0.072 & 0.015 & 0.121 \\ 
##   basal respiration & ITGA4 & 0.005 & 0.028 & 0.015 & 0.083 \\ 
##   basal respiration & CD38 & 0.006 & 0.027 & 0.015 & 0.083 \\ 
##   OCR & CD38 & 0.009 & 0.070 & 0.021 & 0.121 \\ 
##   ATP production & CD38 & 0.013 & 0.070 & 0.026 & 0.121 \\ 
##    \hline
## \end{tabular}
## \end{table}

Plot significant correlations

corRes.sig <- filter(corRes, p.IGHV.adj < 0.05)
pList.all <- lapply(seq(nrow(corRes.sig)), function(i) {
  seaName <- corRes.sig$measurement[i]
  phenoName <- corRes.sig$phenotype[i]
  plotTab <- data.frame(sea = seaTab[seaName,],
                        pheno = phenoTab[[phenoName]],
                        IGHV = phenoTab$IGHV) %>%
    mutate(IGHV = ifelse(is.na(IGHV),"unknown",ifelse(
      IGHV == "M", "mutated","unmutated")))
  corRes <- cor.test(plotTab$sea, plotTab$pheno)
    annoText <- paste("'coefficient ='~",format(corRes$estimate,digits = 2),"*","', p ='~",sciPretty(corRes$p.value,digits=2))
  ggplot(plotTab, aes(x=sea, y = pheno)) + 
    geom_point(aes(color = IGHV)) + geom_smooth(method = "lm", se=FALSE ) +
    theme_bw() + ylab(paste0(phenoName," (normalized expression)")) + xlab("ECAR (pMol/min)") +
    scale_color_manual(values = c(mutated = "red", unmutated = "black", unknown = "grey80")) + ggtitle(formatSea(seaName)) +
    annotate("text", x = -Inf, y = Inf, label = annoText, 
             vjust=1, hjust=0, size = 5, parse = TRUE, col= "darkred") +
    theme(panel.grid = element_blank(),
          axis.title = element_text(size=13, face="bold"),
          axis.text = element_text(size=12),
          plot.title = element_text(face="bold", hjust=0.5, size = 15 ),
          legend.position = c(0.85,0.15),
          legend.background = element_rect(fill = NA),
          axis.title.x = element_text(face="bold"),
          plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))
})
grid.arrange(grobs = pList.all, ncol=2)

pList.U <- lapply(seq(nrow(corRes.sig)), function(i) {
  seaName <- corRes.sig$measurement[i]
  phenoName <- corRes.sig$phenotype[i]
  plotTab <- data.frame(sea = seaTab[seaName,],
                        pheno = phenoTab[[phenoName]],
                        IGHV = phenoTab$IGHV) %>%
  filter(IGHV == "U")
  corRes <- cor.test(plotTab$sea, plotTab$pheno)
    annoText <- paste("'coefficient ='~",format(corRes$estimate,digits = 2),"*","', p ='~",sciPretty(corRes$p.value,digits=2))
  ggplot(plotTab, aes(x=sea, y = pheno)) + 
    geom_point(color = "black") + geom_smooth(method = "lm", se=FALSE ) +
    theme_bw() + ylab(paste0(phenoName," (normalized expression)")) + xlab("ECAR (pMol/min)") +
    scale_color_manual(values = c(mutated = "red", unmutated = "black", unknown = "grey80")) + ggtitle(paste0(formatSea(seaName)," (U-CLL samples only)")) +
    annotate("text", x = -Inf, y = Inf, label = annoText, 
             vjust=1, hjust=0, size = 5, parse = TRUE, col= "darkred") +
    theme(panel.grid = element_blank(),
          axis.title = element_text(size=13, face="bold"),
          axis.text = element_text(size=12),
          plot.title = element_text(face="bold", hjust=0.5, size = 15 ),
          legend.position = c(0.9,0.1),
          axis.title.x = element_text(face="bold"),
          plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))
})

pList.M <- lapply(seq(nrow(corRes.sig)), function(i) {
  seaName <- corRes.sig$measurement[i]
  phenoName <- corRes.sig$phenotype[i]
  plotTab <- data.frame(sea = seaTab[seaName,],
                        pheno = phenoTab[[phenoName]],
                        IGHV = phenoTab$IGHV) %>%
  filter(IGHV == "M")
  corRes <- cor.test(plotTab$sea, plotTab$pheno)
    annoText <- paste("'coefficient ='~",format(corRes$estimate,digits = 2),"*","', p ='~",sciPretty(corRes$p.value,digits=2))
  ggplot(plotTab, aes(x=sea, y = pheno)) + 
    geom_point(color = "red") + geom_smooth(method = "lm", se=FALSE ) +
    theme_bw() + ylab(paste0(phenoName," (normalized expression)")) + xlab("ECAR (pMol/min)") +
    scale_color_manual(values = c(mutated = "red", unmutated = "black", unknown = "grey80")) + ggtitle(paste0(formatSea(seaName)," (M-CLL samples only)")) +
    annotate("text", x = -Inf, y = Inf, label = annoText, 
             vjust=1, hjust=0, size = 5, parse = TRUE, col= "darkred") +
    theme(panel.grid = element_blank(),
          axis.title = element_text(size=13, face="bold"),
          axis.text = element_text(size=12),
          plot.title = element_text(face="bold", hjust=0.5, size = 15 ),
         legend.position = c(0.9,0.1),
          axis.title.x = element_text(face="bold"),
          plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))
})

grid.arrange(grobs = c(pList.U,pList.M), ncol=2)

12 Organize a figure for the clinical part

figOut <- c(osKm$glycolytic.capacity, osKm$glycolytic.reserve, pList.all[[1]],pList.all[[2]])
title = ggdraw() + draw_figure_label("Figure 5", fontface = "bold", position = "top.left",size=20)
p<-plot_grid(osKm$glycolytic.capacity, osKm$glycolytic.reserve,
          pList.all[[1]],
          pList.all[[2]],
          labels = c("A","B","C","D"), label_size = 22)
plot_grid(title, p, rel_heights = c(0.05,0.95), ncol = 1)


13 Multivairate analysis explaining heterogeneity in Seahorse data

13.1 Building multi-variate models that predict CLL bioenergetic features

Loading the data.

data(list=c("conctab", "drpar", "drugs", "patmeta", "lpdAll", "dds", "mutCOM",
"methData","seaCombat","pretreat"))

13.1.1 Data pre-processing

For gene expression and methylation data

#only consider CLL patients
CLLPatients<-rownames(patmeta)[which(patmeta$Diagnosis=="CLL")]

e<-dds
colnames(e)<-colData(e)$PatID

#Methylation Data
methData = t(assay(methData))
methPCA <- prcomp(methData, center = T, scale. = TRUE)$x[,1:20]

#RNA Data
eCLL<-e[,colnames(e) %in% CLLPatients]

#filter out genes without gene name
eCLL<-eCLL[(!rowData(eCLL)$symbol %in% c("",NA)),]

#filter out low count genes
###
minrs <- 100
rs  <- rowSums(assay(eCLL))
eCLL<-eCLL[ rs >= minrs, ]

#variance stabilize the data
vstCounts<-varianceStabilizingTransformation(eCLL)
vstCounts<-assay(vstCounts)

#filter out low variable genes
ntop<-5000
vstCountsFiltered<-vstCounts[order(apply(vstCounts, 1, var, na.rm=T),
                                   decreasing = T)[1:ntop],]
eData<-t(vstCountsFiltered)

#Prepare PCA
pcRes <- prcomp(eData, center = T, scale. = TRUE)
rnaPCA <- pcRes$x[,1:20]
pcLoad <- pcRes$rotation[,1:20]

For genetic data

#genetics
mutCOMbinary<-channel(mutCOM, "binary")
mutCOMbinary<-mutCOMbinary[featureNames(mutCOMbinary) %in% colnames(seaCombat),]
genData<-Biobase::exprs(mutCOMbinary)
idx <- which(colnames(genData) %in% c("del13q14_bi", "del13q14_mono"))
genData <- genData[,-idx]
colnames(genData)[which(colnames(genData)=="del13q14_any")] = "del13q14"

#remove gene with higher than 20% missing values
genData <- genData[,colSums(is.na(genData))/nrow(genData) <= 0.2]

#fill the missing value with majority
genData <- apply(genData, 2, function(x) {
  xVec <- x
  avgVal <- mean(x,na.rm= TRUE)
  if (avgVal >= 0.5) {
    xVec[is.na(xVec)] <- 1
  } else xVec[is.na(xVec)] <- 0
  xVec
})

For IGHV and methylation cluster

#IGHV
translation <- c(`U` = 0, `M` = 1)
stopifnot(all(patmeta$IGHV %in% c("U","M", NA)))
IGHVData <- matrix(translation[patmeta$IGHV], 
                   dimnames = list(rownames(patmeta), "IGHV"), ncol = 1)
IGHVData<-IGHVData[rownames(IGHVData) %in% CLLPatients,,drop=F]
#remove patiente with NA IGHV status
IGHVData<-IGHVData[!is.na(IGHVData), ,drop=F]

# Methylation cluster
translation <- c(`HP` = 2, `IP` = 1, `LP` = 0)
Mcluster <- matrix(translation[patmeta$ConsClust],
                   dimnames = list(rownames(patmeta), "ConsCluster"), ncol = 1)
Mcluster <- Mcluster[rownames(Mcluster) %in% CLLPatients,,drop=F]
Mcluster <- Mcluster[!is.na(Mcluster), ,drop=F]

For demographic and clinical data

#demographics (age and sex)
patmeta<-subset(patmeta, Diagnosis=="CLL")
gender <- ifelse(patmeta[,"Gender"]=="m",0,1)


# impute missing values in age by mean
ImputeByMean <- function(x) {x[is.na(x)] <- mean(x, na.rm=TRUE); return(x)}
age<-ImputeByMean(patmeta[,"Age4Main"])


demogrData <- cbind(age=age,gender=gender)
rownames(demogrData) <- rownames(patmeta)

#Pretreatment
pretreated<- pretreat

For drug response data

#Remove bad drugs. Bortezomib lost its activity during storage. The data for this drug and NSC 74859 were discarded from further analysis.
badrugs = c("D_008", "D_025") 
lpdCLL <- lpdAll[!fData(lpdAll)$id %in% badrugs, pData(lpdAll)$Diagnosis == "CLL"]

# get drug responsee data
get.drugresp <- function(lpd) {
  drugresp = t(Biobase::exprs(lpd[fData(lpd)$type == 'viab'])) %>%
    tbl_df %>% dplyr::select(-ends_with(":5")) %>%
    dplyr::mutate(ID = colnames(lpd)) %>%
    tidyr::gather(drugconc, viab, -ID) %>%
    dplyr::mutate(drug = drugs[substring(drugconc, 1, 5), "name"],
           conc = sub("^D_([0-9]+_)", "", drugconc)) %>%
    dplyr::mutate(conc = as.integer(gsub("D_CHK_", "", conc)))
  
  drugresp
}
drugresp <- get.drugresp(lpdCLL)

#Use median polish to summarise drug response of the five concentrations
get.medp <- function(drugresp) {
  tab = drugresp %>% group_by(drug, conc) %>% 
    do(data.frame(v = .$viab, ID = .$ID)) %>% spread(ID, v)
  
  med.p = lapply(unique(tab$drug), function(n) {
    tb = filter(tab, drug == n) %>% ungroup() %>% 
      dplyr::select(-(drug:conc)) %>% 
      as.matrix %>% `rownames<-`(1:5)
    mdp = stats::medpolish(tb, trace.iter = FALSE)
    df = as.data.frame(mdp$col) + mdp$overall
    colnames(df) <- n
    df
  }) %>% bind_cols()
  
  
  
  medp.viab = tbl_df(med.p) %>% mutate(ID = colnames(tab)[3:ncol(tab)]) %>%
    gather(drug, viab, -ID) 
  medp.viab
}
drugresp.mp <- get.medp(drugresp)
viabData <- dcast(drugresp.mp, ID ~ drug, value.var = "viab" )
rownames(viabData) <- viabData$ID
viabData$ID <- NULL

Prepare seahorse meaurement (response vector)

sea <- t(assays(seaCombat)$seaMedian)

Function to Generate the explanatory dataset for each seahorse measurements

#function to generate response vector and explainatory variable for each seahorse measurement

generateData <- function(inclSet, onlyCombine = FALSE, censor = NULL, robust = FALSE) {
    
    dataScale <- function(x, censor = NULL, robust = FALSE) {
        #function to scale different variables
        if (length(unique(na.omit(x))) == 2){
          #a binary variable, change to -0.5 and 0.5 for 1 and 2
          x - 0.5
        } else if (length(unique(na.omit(x))) == 3) {
          #catagorical varialbe with 3 levels, methylation_cluster, change to -0.5,0,0.5
          (x - 1)/2
        } else {
          if (robust) {
          #continuous variable, centered by median and divied by 2*mad
          mScore <- (x-median(x,na.rm=TRUE))/(1.4826*mad(x,na.rm=TRUE))
            if (!is.null(censor)) {
              mScore[mScore > censor] <- censor
              mScore[mScore < -censor] <- -censor
            }
          mScore/2
          } else {
            mScore <- (x-mean(x,na.rm=TRUE))/(sd(x,na.rm=TRUE))
              if (!is.null(censor)) {
                mScore[mScore > censor] <- censor
                mScore[mScore < -censor] <- -censor
              }
          mScore/2
          }
        }
      }
    
    
    
    allResponse <- list()
    allExplain <- list()

    for (measure in colnames(inclSet$seahorse)) {
      y <- inclSet$seahorse[,measure]
      y <- y[!is.na(y)]
      
      #get overlapped samples for each dataset 
      overSample <- names(y)
      
      for (eachSet in inclSet) {
        overSample <- intersect(overSample,rownames(eachSet))
      }
      
      y <- dataScale(y[overSample], censor = censor, robust = robust)
      
      #generate explainatory variable table for each seahorse measurement
      expTab <- list()
      
      if ("drugs" %in% names(inclSet)) {
        viabTab <- inclSet$drugs[overSample,]
        vecName <- sprintf("drugs(%s)",ncol(viabTab))
        colnames(viabTab) <- paste0("con.",colnames(viabTab))
        expTab[[vecName]] <- apply(viabTab,2,dataScale,censor = censor, robust = robust)
      }
      
      if ("gen" %in% names(inclSet)) {
        geneTab <- inclSet$gen[overSample,]
        #at least 3 mutated sample
        geneTab <- geneTab[, colSums(geneTab) >= 3]
        vecName <- sprintf("genetic(%s)", ncol(geneTab))
        expTab[[vecName]] <- apply(geneTab,2,dataScale)
      }
      
      
      if ("RNA" %in% names(inclSet)){
        
        #for PCA
        rnaPCA <- inclSet$RNA[overSample, ]
        colnames(rnaPCA) <- paste0("con.expression",colnames(rnaPCA), sep = "")
        expTab[["expression(20)"]] <- apply(rnaPCA,2,dataScale, censor = censor, robust = robust)
        
      }
        
      
      if ("meth" %in% names(inclSet)){
        methPCA <- inclSet$meth[overSample,]
        colnames(methPCA) <- paste("con.methylation",colnames(methPCA),sep = "")
        expTab[["methylation(20)"]] <- apply(methPCA[,1:20],2,dataScale, censor = censor, robust = robust)
      }
      
      if ("IGHV" %in% names(inclSet)) {
        IGHVtab <- inclSet$IGHV[overSample,,drop=FALSE]
        expTab[["IGHV(1)"]] <- apply(IGHVtab,2,dataScale)
      }
      
      if ("Mcluster" %in% names(inclSet)) {
        methTab <- inclSet$Mcluster[overSample,,drop=FALSE]
        colnames(methTab) <- "methylation cluster"
        expTab[["methCluster(1)"]] <- apply(methTab,2,dataScale)
      }
      
      if ("demographics" %in% names(inclSet)){
        demoTab <- inclSet$demographics[overSample,]
        vecName <- sprintf("demographics(%s)", ncol(demoTab))
        expTab[[vecName]] <- apply(demoTab,2,dataScale)
      }
      
      if ("pretreated" %in% names(inclSet)){
        preTab <- inclSet$pretreated[overSample,,drop=FALSE]
        expTab[["pretreated(1)"]] <- apply(preTab,2,dataScale)
      }
      
      comboTab <- c()
      for (eachSet in names(expTab)){
        comboTab <- cbind(comboTab, expTab[[eachSet]])
      }
      vecName <- sprintf("all(%s)", ncol(comboTab))
      expTab[[vecName]] <- comboTab
      
      measureName <- sprintf("%s(%s)",formatSea(measure),length(y))
      allResponse[[measureName]] <- y
      allExplain[[measureName]] <- expTab
    }
  if (onlyCombine) {
    #only return combined results, for feature selection
    allExplain <- lapply(allExplain, function(x) x[length(x)])
  }
    
  return(list(allResponse=allResponse, allExplain=allExplain))

}

13.1.2 Calulate bioenergetic variance explained by multi-omics data set

13.1.2.1 Training models

Clean and integrate multi-omics data

inclSet<-list(RNA=rnaPCA, meth=methPCA, gen=genData, IGHV=IGHVData,
            demographics=demogrData, drugs=viabData, pretreated=pretreated, seahorse = sea)
cleanData <- generateData(inclSet, censor = 4)

Function for multi-variate regression

runGlm <- function(X, y, method = "ridge", repeats=20, folds = 3) {
  modelList <- list()
  lambdaList <- c()
  varExplain <- c()
  coefMat <- matrix(NA, ncol(X), repeats)
  rownames(coefMat) <- colnames(X)

  if (method == "lasso"){
    alpha = 1
  } else if (method == "ridge") {
    alpha = 0
  }
  
  for (i in seq(repeats)) {
    if (ncol(X) > 2) {
      res <- cv.glmnet(X,y, type.measure = "mse", family="gaussian", 
                       nfolds = folds, alpha = alpha, standardize = FALSE)
      lambdaList <- c(lambdaList, res$lambda.min)
      modelList[[i]] <- res
      
      coefModel <- coef(res, s = "lambda.min")[-1] #remove intercept row
      coefMat[,i] <- coefModel
      
      #calculate variance explained
      y.pred <- predict(res, s = "lambda.min", newx = X)
      varExp <- cor(as.vector(y),as.vector(y.pred))^2
      varExplain[i] <- ifelse(is.na(varExp), 0, varExp) 
      
    } else {
      fitlm<-lm(y~., data.frame(X))
      varExp <- summary(fitlm)$r.squared
      varExplain <- c(varExplain, varExp)
      
    }

  }
  list(modelList = modelList, lambdaList = lambdaList, varExplain = varExplain, coefMat = coefMat)
}

Perform lasso regression

lassoResults <- list()
for (eachMeasure in names(cleanData$allResponse)) {
  dataResult <- list()
  for (eachDataset in names(cleanData$allExplain[[eachMeasure]])) {
    y <- cleanData$allResponse[[eachMeasure]]
    X <- cleanData$allExplain[[eachMeasure]][[eachDataset]]
  
   
    glmRes <- runGlm(X, y, method = "lasso", repeats = 100, folds = 3)
    dataResult[[eachDataset]] <- glmRes 
  }
  lassoResults[[eachMeasure]] <- dataResult
  
}

13.1.2.2 Ploting results

Function for plotting variance explained for each measurement

Show the plot

grid.arrange(grobs = varList, ncol = 4)

13.1.3 Using LASSO model to select important features

13.1.3.1 Training models

Prepare clean data for feature selection

inclSet<-list(RNA=rnaPCA, gen=genData, IGHV=IGHVData, 
              drugs=viabData, seahorse = sea)
cleanData <- generateData(inclSet, onlyCombine = TRUE, censor = 4)

Perform lasso regression

lassoResults <- list()
for (eachMeasure in names(cleanData$allResponse)) {
  dataResult <- list()
  for (eachDataset in names(cleanData$allExplain[[eachMeasure]])) {
    y <- cleanData$allResponse[[eachMeasure]]
    X <- cleanData$allExplain[[eachMeasure]][[eachDataset]]
  
   
    glmRes <- runGlm(X, y, method = "lasso", repeats = 100, folds = 3)
    dataResult[[eachDataset]] <- glmRes 
  }
  lassoResults[[eachMeasure]] <- dataResult
  
}

13.1.4 Ploting results

Function for the heatmap plot

lassoPlot <- function(lassoOut, cleanData, freqCut = 1, coefCut = 0.01, setNumber = "last") {
  plotList <- list()
  if (setNumber == "last") {
    setNumber <- length(lassoOut[[1]])
  } else {
    setNumber <- setNumber
  }
  for (seaName in names(lassoOut)) {
    #for the barplot on the left of the heatmap
    barValue <- rowMeans(lassoOut[[seaName]][[setNumber]]$coefMat)
    freqValue <- rowMeans(abs(sign(lassoOut[[seaName]][[setNumber]]$coefMat)))
    barValue <- barValue[abs(barValue) >= coefCut & freqValue >= freqCut] # a certain threshold
    barValue <- barValue[order(barValue)]
    if(length(barValue) == 0) {
      plotList[[seaName]] <- NA
      next
    }

    #for the heatmap and scatter plot below the heatmap
    allData <- cleanData$allExplain[[seaName]][[setNumber]]
    seaValue <- cleanData$allResponse[[seaName]]*2 #back to Z-score
    
    tabValue <- allData[, names(barValue),drop=FALSE]
    ord <- order(seaValue)
    seaValue <- seaValue[ord]
    tabValue <- tabValue[ord, ,drop=FALSE]
    sampleIDs <- rownames(tabValue)
    tabValue <- as.tibble(tabValue)
    
    #change scaled binary back to catagorical
    for (eachCol in colnames(tabValue)) {
      if (strsplit(eachCol, split = "[.]")[[1]][1] != "con") {
        tabValue[[eachCol]] <- as.integer(as.factor(tabValue[[eachCol]]))
      }
      else {
        tabValue[[eachCol]] <- tabValue[[eachCol]]*2 #back to Z-score
      }
    }
    
    tabValue$Sample <- sampleIDs
    #Mark different rows for different scaling in heatmap
    matValue <- gather(tabValue, key = "Var",value = "Value", -Sample)
    matValue$Type <- "mut"
    
    #For continuious value
    matValue$Type[grep("con.",matValue$Var)] <- "con"
    
    #for methylation_cluster
    matValue$Type[grep("ConsCluster",matValue$Var)] <- "meth"
    
    #change the scale of the value, let them do not overlap with each other
    matValue[matValue$Type == "mut",]$Value = matValue[matValue$Type == "mut",]$Value + 10
    matValue[matValue$Type == "meth",]$Value = matValue[matValue$Type == "meth",]$Value + 20
    
    
    #color scale for viability
    idx <- matValue$Type == "con"
    
    myCol <- colorRampPalette(c('dark red','white','dark blue'), 
                   space = "Lab")
    if (sum(idx) != 0) {
      matValue[idx,]$Value = round(matValue[idx,]$Value,digits = 2)
      minViab <- min(matValue[idx,]$Value)
      maxViab <- max(matValue[idx,]$Value)
      limViab <- max(c(abs(minViab), abs(maxViab)))
      scaleSeq1 <- round(seq(-limViab, limViab,0.01), digits=2)
      color4viab <- setNames(myCol(length(scaleSeq1+1)), nm=scaleSeq1)
    } else {
      scaleSeq1 <- round(seq(0,1,0.01), digits=2)
      color4viab <- setNames(myCol(length(scaleSeq1+1)), nm=scaleSeq1)
    }
    

    
    #change continues measurement to discrete measurement
    matValue$Value <- factor(matValue$Value,levels = sort(unique(matValue$Value)))
    
    #change order of heatmap
    names(barValue) <-  gsub("con.", "", names(barValue))
    matValue$Var <- gsub("con.","",matValue$Var)
    matValue$Var <- factor(matValue$Var, levels = names(barValue))
    matValue$Sample <- factor(matValue$Sample, levels = names(seaValue))
    #plot the heatmap
    p1 <- ggplot(matValue, aes(x=Sample, y=Var)) + geom_tile(aes(fill=Value), color = "gray") + 
      theme_bw() + scale_y_discrete(expand=c(0,0)) + theme(axis.text.y=element_text(hjust=0, size=10), axis.text.x=element_blank(), axis.ticks=element_blank(), panel.border=element_rect(colour="gainsboro"),  plot.title=element_text(size=12), panel.background=element_blank(), panel.grid.major=element_blank(), panel.grid.minor=element_blank()) + xlab("patients") + ylab("") + scale_fill_manual(name="Mutated", values=c(color4viab, `11`="gray96", `12`='black', `21`='lightgreen', `22`='green',`23` = 'green4'),guide=FALSE) + ggtitle(seaName)
    
    #Plot the bar plot on the left of the heatmap
    barDF = data.frame(barValue, nm=factor(names(barValue),levels=names(barValue)))
    
    p2 <- ggplot(data=barDF, aes(x=nm, y=barValue)) + 
      geom_bar(stat="identity", fill="lightblue", colour="black", position = "identity", width=.66, size=0.2) + 
      theme_bw() + geom_hline(yintercept=0, size=0.3) + scale_x_discrete(expand=c(0,0.5)) + 
      scale_y_continuous(expand=c(0,0)) + coord_flip(ylim=c(min(barValue),max(barValue))) + 
      theme(panel.grid.major=element_blank(), panel.background=element_blank(), axis.ticks.y = element_blank(),
            panel.grid.minor = element_blank(), axis.text=element_text(size=8), panel.border=element_blank()) +
      xlab("") + ylab("") + geom_vline(xintercept=c(0.5), color="black", size=0.6)
    
    #Plot the scatter plot under the heatmap
    
    # scatterplot below
    scatterDF = data.frame(X=factor(names(seaValue), levels=names(seaValue)), Y=seaValue)
    
    p3 <- ggplot(scatterDF, aes(x=X, y=Y)) + geom_point(shape=21, fill="dimgrey", colour="black", size=1.2) + theme_bw() + theme(panel.grid.minor=element_blank(), panel.grid.major.x=element_blank(), axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.text.y=element_text(size=8), panel.border=element_rect(colour="dimgrey", size=0.1), panel.background=element_rect(fill="gray96"))
    
    #Scale bar for continuous variable

    Vgg = ggplot(data=data.frame(x=1, y=as.numeric(names(color4viab))), aes(x=x, y=y, color=y)) + geom_point() + 
      scale_color_gradientn(name="Z-score", colours =color4viab) + theme(legend.title=element_text(size=12), legend.text=element_text(size=10))
    
    #Assemble all the plots togehter

    # construct the gtable
    wdths = c(1.5, 0.2, 1.3*ncol(matValue), 1.4, 1)
    hghts = c(0.1, 0.0015*nrow(matValue), 0.16, 0.5)
    gt = gtable(widths=unit(wdths, "in"), heights=unit(hghts, "in"))
    
    ## make grobs
    gg1 = ggplotGrob(p1)
    gg2 = ggplotGrob(p2)
    gg3 = ggplotGrob(p3)
    gg4 = ggplotGrob(Vgg)
    
    ## fill in the gtable
    gt = gtable_add_grob(gt, gtable_filter(gg2, "panel"), 2, 1) # add barplot
    gt = gtable_add_grob(gt, gtable_filter(gg1, "panel"), 2, 3) # add heatmap
    gt = gtable_add_grob(gt, gtable_filter(gg1, "title"), 1, 3) #add title to plot
    gt = gtable_add_grob(gt, gtable_filter(gg3, "panel"), 4, 3) # add scatterplot
    gt = gtable_add_grob(gt, gtable_filter(gg2, "axis-b"), 3, 1) # y axis for barplot
    gt = gtable_add_grob(gt, gtable_filter(gg3, "axis-l"), 4, 2) # y axis for scatter plot
    gt = gtable_add_grob(gt, gtable_filter(gg1, "axis-l"), 2, 4) # variable names
    gt = gtable_add_grob(gt, gtable_filter(gg4, "guide-box"), 2, 5) # scale bar for continous variables

    
    #plot
    #grid.draw(gt)
    plotList[[seaName]] <- gt
  }
  return(plotList)
}

Plot all heatmaps

heatMaps <- lassoPlot(lassoResults, cleanData, freqCut = 0.8)

Arrange for the main figure (Figure 7)

title = ggdraw() + draw_figure_label("Figure 6", fontface = "bold", position = "top.left",size=20)
p <- ggdraw() + 
  draw_plot(varList[[1]], 0,0.6,0.25,0.4) + 
  draw_plot(varList[[4]], 0.25,0.6,0.25,0.4) + 
  draw_plot(varList[[6]], 0.5,0.6,0.25,0.4) + 
  draw_plot(varList[[7]], 0.75,0.6,0.25,0.4) +
  draw_plot(heatMaps[[1]], 0, 0, 1, 0.3 ) +
  draw_plot(heatMaps[[6]], 0, 0.3, 1, 0.3 ) + 
  draw_plot_label(c("A","B"), c(0,0), c(1, 0.6),size=20)
plot_grid(title, p, rel_heights = c(0.05,0.95), ncol = 1)

Plot other feature for supplementary file

allList <- list(varList[[2]], heatMaps[[2]],
                varList[[4]], heatMaps[[4]],
                varList[[5]], heatMaps[[5]],
                varList[[7]], heatMaps[[7]],
                varList[[8]], heatMaps[[8]],
                varList[[9]], heatMaps[[9]],
                varList[[11]], heatMaps[[11]])

plot_grid(plotlist = allList, rel_widths = c(1,4,1,4),ncol = 4)                

13.2 Charactersing bioligical meanings of expression PCs

Prepare signature sets

gmts = list(H=system.file("extdata","h.all.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"),
            KEGG=system.file("extdata","c2.cp.kegg.v5.1.symbols.gmt", package = "BloodCancerMultiOmics2017"))

Enrichment using HALLMARK

plotPC <- colnames(pcLoad)[c(2,3, 4, 6, 8,10,11,15)]
enHallmark <- lapply(plotPC, function(pc) {
  inputTab <- data.frame(ID = rowData(dds[rownames(pcLoad),])$symbol,
                         stat = pcLoad[,pc]) %>%
    arrange(abs(stat)) %>% distinct(ID, .keep_all = TRUE) %>%
    column_to_rownames("ID")
  res <- runGSEA(inputTab = inputTab, gmtFile = gmts$H, GSAmethod = "page")
  res$Name <- gsub("HALLMARK_","", res$Name)
  res
}) 
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
names(enHallmark) <- sapply(plotPC, 
                            function(x) paste("expression",x, collapse = ""))
plotHallmark1 <- jyluMisc::plotEnrichmentBar(enHallmark[1:4], pCut = 0.05, setName = "", ifFDR = TRUE)
plotHallmark2 <- jyluMisc::plotEnrichmentBar(enHallmark[5:8], pCut = 0.05, setName = "", ifFDR = TRUE)

#save to pdf manually
ggdraw() +
  draw_plot(plotHallmark1, 0,0,0.5,1) +
  draw_plot(plotHallmark2, 0.5,0.4,0.5,0.6)

Enrichment using KEGG

plotPC <- colnames(pcLoad)[c(2,3, 4, 6, 8,10,11,15)]
enHallmark <- lapply(plotPC, function(pc) {
  inputTab <- data.frame(ID = rowData(dds[rownames(pcLoad),])$symbol,
                         stat = pcLoad[,pc]) %>%
    arrange(abs(stat)) %>% distinct(ID, .keep_all = TRUE) %>%
    column_to_rownames("ID")
  res <- runGSEA(inputTab = inputTab, gmtFile = gmts$KEGG, GSAmethod = "page")
  res$Name <- gsub("KEGG_","", res$Name)
  res
}) 
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
names(enHallmark) <- sapply(plotPC, 
                            function(x) paste("expression",x, collapse = ""))
plotHallmark1 <- jyluMisc::plotEnrichmentBar(enHallmark[1:4], pCut = 0.05, setName = "", ifFDR = TRUE)
plotHallmark2 <- jyluMisc::plotEnrichmentBar(enHallmark[5:8], pCut = 0.05, setName = "", ifFDR = TRUE)
## [1] "No sets passed the criteria"
## [1] "No sets passed the criteria"
#save to pdf manually
ggdraw() +
  draw_plot(plotHallmark1, 0,0,0.5,1) +
  draw_plot(plotHallmark2, 0.5,0.4,0.5,0.6)


14 End of session

sessionInfo()
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS  10.14.1
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] bindrcpp_0.2.2                  forcats_0.3.0                  
##  [3] stringr_1.3.1                   dplyr_0.7.8                    
##  [5] purrr_0.2.5                     readr_1.3.1                    
##  [7] tidyr_0.8.2                     tibble_2.0.1                   
##  [9] tidyverse_1.2.1                 gtable_0.2.0                   
## [11] reshape2_1.4.3                  RColorBrewer_1.1-2             
## [13] glmnet_2.0-16                   foreach_1.4.4                  
## [15] Matrix_1.2-15                   survminer_0.4.3                
## [17] ggpubr_0.2                      magrittr_1.5                   
## [19] maxstat_0.7-25                  survival_2.43-3                
## [21] DESeq2_1.20.0                   robustbase_0.93-3              
## [23] limma_3.36.5                    GEOquery_2.48.0                
## [25] ggrepel_0.8.0.9000              pheatmap_1.0.12                
## [27] genefilter_1.62.0               gridExtra_2.3                  
## [29] piano_1.20.1                    cowplot_0.9.4                  
## [31] xtable_1.8-3                    ggbeeswarm_0.6.0               
## [33] ggplot2_3.1.0                   SummarizedExperiment_1.10.1    
## [35] DelayedArray_0.6.6              BiocParallel_1.14.2            
## [37] matrixStats_0.54.0              Biobase_2.40.0                 
## [39] GenomicRanges_1.32.7            GenomeInfoDb_1.16.0            
## [41] IRanges_2.14.12                 S4Vectors_0.18.3               
## [43] BiocGenerics_0.26.0             BloodCancerMultiOmics2017_1.0.2
## [45] seahorseCLL_1.0.0               BiocStyle_2.8.2                
## 
## loaded via a namespace (and not attached):
##   [1] tidyselect_0.2.5       RSQLite_2.1.1          AnnotationDbi_1.42.1  
##   [4] htmlwidgets_1.3        devtools_2.0.1         munsell_0.5.0         
##   [7] preprocessCore_1.42.0  codetools_0.2-16       withr_2.1.2           
##  [10] colorspace_1.4-0       BiocInstaller_1.30.0   knitr_1.21            
##  [13] rstudioapi_0.9.0       labeling_0.3           slam_0.1-44           
##  [16] GenomeInfoDbData_1.1.0 KMsurv_0.1-5           bit64_0.9-7           
##  [19] rprojroot_1.3-2        TH.data_1.0-10         generics_0.0.2        
##  [22] xfun_0.4               sets_1.0-18            R6_2.3.0              
##  [25] locfit_1.5-9.1         bitops_1.0-6           fgsea_1.6.0           
##  [28] assertthat_0.2.0       scales_1.0.0           multcomp_1.4-8        
##  [31] nnet_7.3-12            beeswarm_0.2.3         affy_1.58.0           
##  [34] processx_3.2.1         sandwich_2.5-0         rlang_0.3.1           
##  [37] cmprsk_2.2-7           splines_3.5.0          lazyeval_0.2.1        
##  [40] acepack_1.4.1          broom_0.5.1            checkmate_1.9.1       
##  [43] abind_1.4-5            yaml_2.2.0             modelr_0.1.2          
##  [46] backports_1.1.3        Hmisc_4.1-1            tools_3.5.0           
##  [49] relations_0.6-8        usethis_1.4.0          bookdown_0.9          
##  [52] affyio_1.50.0          gplots_3.0.1           ggdendro_0.1-20       
##  [55] sessioninfo_1.1.1      Rcpp_1.0.0             plyr_1.8.4            
##  [58] base64enc_0.1-3        zlibbioc_1.26.0        RCurl_1.95-4.11       
##  [61] ps_1.3.0               prettyunits_1.0.2      rpart_4.1-13          
##  [64] zoo_1.8-4              haven_2.0.0            cluster_2.0.7-1       
##  [67] exactRankTests_0.8-29  fs_1.2.6               data.table_1.12.0     
##  [70] openxlsx_4.1.0         mvtnorm_1.0-8          pkgload_1.0.2         
##  [73] hms_0.4.2              evaluate_0.12          XML_3.98-1.16         
##  [76] rio_0.5.16             readxl_1.2.0           testthat_2.0.1        
##  [79] compiler_3.5.0         KernSmooth_2.23-15     crayon_1.3.4          
##  [82] htmltools_0.3.6        Formula_1.2-3          geneplotter_1.58.0    
##  [85] lubridate_1.7.4        DBI_1.0.0              jyluMisc_0.1.5        
##  [88] MASS_7.3-51.1          car_3.0-2              cli_1.0.1             
##  [91] vsn_3.48.1             marray_1.58.0          gdata_2.18.0          
##  [94] bindr_0.1.1            igraph_1.2.2           pkgconfig_2.0.2       
##  [97] km.ci_0.5-2            foreign_0.8-71         xml2_1.2.0            
## [100] annotate_1.58.0        vipor_0.4.5            ipflasso_0.1          
## [103] XVector_0.20.0         drc_3.0-1              rvest_0.3.2           
## [106] callr_3.1.1            digest_0.6.18          rmarkdown_1.11        
## [109] cellranger_1.1.0       fastmatch_1.1-0        survMisc_0.5.5        
## [112] htmlTable_1.13.1       curl_3.3               gtools_3.8.1          
## [115] nlme_3.1-137           jsonlite_1.6           carData_3.0-2         
## [118] desc_1.2.0             pillar_1.3.1           lattice_0.20-38       
## [121] plotrix_3.7-4          httr_1.4.0             DEoptimR_1.0-8        
## [124] pkgbuild_1.0.2         glue_1.3.0             remotes_2.0.2         
## [127] zip_1.0.0              iterators_1.0.10       bit_1.1-14            
## [130] stringi_1.2.4          blob_1.1.1             latticeExtra_0.6-28   
## [133] caTools_1.17.1.1       memoise_1.1.0